Class: Dor::TechnicalMetadataService

Inherits:
Object
  • Object
show all
Defined in:
lib/dor/services/technical_metadata_service.rb

Class Method Summary collapse

Class Method Details

.add_update_technical_metadata(dor_item) ⇒ Boolean

Returns True if technical metadata is correctly added or updated.

Parameters:

  • dor_item (Dor::Item)

    The DOR item being processed by the technical metadata robot

Returns:

  • (Boolean)

    True if technical metadata is correctly added or updated



11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
# File 'lib/dor/services/technical_metadata_service.rb', line 11

def self.(dor_item)
  test_jhove_service
  druid = dor_item.pid
  content_group_diff = get_content_group_diff(dor_item)
  deltas = get_file_deltas(content_group_diff)
  new_files = get_new_files(deltas)
  old_techmd = (dor_item)
  new_techmd = (druid, new_files)
  if old_techmd.nil?
    # this is version 1 or previous technical metadata was not saved
    final_techmd = new_techmd
  elsif content_group_diff.difference_count == 0
    # there have been no changes to content files from previous version
    return true
  else
    merged_nodes = merge_file_nodes(old_techmd, new_techmd, deltas)
    final_techmd = (druid, merged_nodes)
  end
  ds = dor_item.datastreams['technicalMetadata']
  ds.dsLabel = 'Technical Metadata'
  ds.content = final_techmd
  ds.save
  true
end

.build_technical_metadata(druid, merged_nodes) ⇒ String

Returns The finalized technicalMetadata datastream contents for the new object version.

Parameters:

  • druid (String)

    The identifier of the digital object being processed by the technical metadata robot

  • merged_nodes (Hash<String,Nokogiri::XML::Node>)

    The complete set of technicalMetadata nodes for the digital object, indexed by filename

Returns:

  • (String)

    The finalized technicalMetadata datastream contents for the new object version



188
189
190
191
192
193
194
195
196
197
198
# File 'lib/dor/services/technical_metadata_service.rb', line 188

def self.(druid, merged_nodes)
  techmd_root = "<technicalMetadata objectId='\#{druid}' datetime='\#{Time.now.utc.iso8601}'\nxmlns:jhove='http://hul.harvard.edu/ois/xml/ns/jhove'\nxmlns:mix='http://www.loc.gov/mix/v10'\nxmlns:textmd='info:lc/xmlns/textMD-v3'>\n  EOF\n  doc = techmd_root\n  merged_nodes.keys.sort.each {|path| doc << merged_nodes[path] }\n  doc + '</technicalMetadata>'\nend\n"

.get_content_group_diff(dor_item) ⇒ FileGroupDifference

Returns The differences between two versions of a group of files.

Parameters:

  • dor_item (Dor::Item)

    The DOR item being processed by the technical metadata robot

Returns:

  • (FileGroupDifference)

    The differences between two versions of a group of files



50
51
52
53
# File 'lib/dor/services/technical_metadata_service.rb', line 50

def self.get_content_group_diff(dor_item)
  inventory_diff = dor_item.get_content_diff('all')
  inventory_diff.group_difference('content')
end

.get_dor_technical_metadata(dor_item) ⇒ String

Returns The technicalMetadata datastream from the previous version of the digital object (fetched from DOR fedora). The data is updated to the latest format.

Parameters:

  • dor_item (Dor::Item)

    The DOR item being processed by the technical metadata robot

Returns:

  • (String)

    The technicalMetadata datastream from the previous version of the digital object (fetched from DOR fedora). The data is updated to the latest format.



88
89
90
91
92
93
94
95
# File 'lib/dor/services/technical_metadata_service.rb', line 88

def self.(dor_item)
  ds = 'technicalMetadata'
  return nil unless dor_item.datastreams.keys.include?(ds) && !dor_item.datastreams[ds].new?
  dor_techmd = dor_item.datastreams[ds].content
  return dor_techmd if dor_techmd =~ /<technicalMetadata/
  return ::JhoveService.new.(dor_techmd) if dor_techmd =~ /<jhove/
  nil
end

.get_file_deltas(content_group_diff) ⇒ Hash<Symbol,Array>

Returns Sets of filenames grouped by change type for use in performing file or metadata operations.

Parameters:

  • content_group_diff (FileGroupDifference)

Returns:

  • (Hash<Symbol,Array>)

    Sets of filenames grouped by change type for use in performing file or metadata operations



57
58
59
# File 'lib/dor/services/technical_metadata_service.rb', line 57

def self.get_file_deltas(content_group_diff)
  content_group_diff.file_deltas
end

.get_file_nodes(technical_metadata) ⇒ Hash<String,Nokogiri::XML::Node>

Returns The set of nodes from a technicalMetadata datastream, indexed by filename.

Parameters:

  • technical_metadata (String)

    A technicalMetadata datastream contents

Returns:

  • (Hash<String,Nokogiri::XML::Node>)

    The set of nodes from a technicalMetadata datastream, indexed by filename



161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
# File 'lib/dor/services/technical_metadata_service.rb', line 161

def self.get_file_nodes()
  file_hash = {}
  return file_hash if .nil?
  current_file = []
  path = nil
  in_file = false
  .each_line do |line|
    if line =~ /^\s*<file.*["'](.*?)["']/
      current_file << line
      path = $1
      in_file = true
    elsif line =~ /^\s*<\/file>/
      current_file << line
      file_hash[path] = current_file.join
      current_file = []
      path = nil
      in_file = false
    elsif in_file
      current_file << line
    end
  end
  file_hash
end

.get_new_files(deltas) ⇒ Array<String>

Returns The list of filenames for files that are either added or modifed since the previous version.

Parameters:

  • deltas (Hash<Symbol,Array>)

    Sets of filenames grouped by change type for use in performing file or metadata operations

Returns:

  • (Array<String>)

    The list of filenames for files that are either added or modifed since the previous version



63
64
65
# File 'lib/dor/services/technical_metadata_service.rb', line 63

def self.get_new_files(deltas)
  deltas[:added] + deltas[:modified]
end

.get_new_technical_metadata(druid, new_files) ⇒ String

Returns The technicalMetadata datastream for the new files of the new digital object version.

Parameters:

  • druid (DruidTools::Druid)

    A wrapper class for the druid identifier. Used to generate paths

  • new_files (Array<String>)

    The list of filenames for files that are either added or modifed since the previous version

Returns:

  • (String)

    The technicalMetadata datastream for the new files of the new digital object version



107
108
109
110
111
112
113
114
115
116
117
118
# File 'lib/dor/services/technical_metadata_service.rb', line 107

def self.(druid, new_files)
  return nil if new_files.nil? || new_files.empty?
  workspace = DruidTools::Druid.new(druid, Dor::Config.sdr.local_workspace_root)
  content_dir = workspace.find_filelist_parent('content', new_files)
  temp_dir = workspace.temp_dir
  jhove_service = ::JhoveService.new(temp_dir)
  jhove_service.digital_object_id = druid
  fileset_file = write_fileset(temp_dir, new_files)
  jhove_output_file = jhove_service.run_jhove(content_dir, fileset_file)
  tech_md_file = jhove_service.(jhove_output_file)
  IO.read(tech_md_file)
end

.get_old_technical_metadata(dor_item) ⇒ String

Returns The technicalMetadata datastream from the previous version of the digital object.

Parameters:

  • dor_item (Dor::Item)

    The DOR item being processed by the technical metadata robot

Returns:

  • (String)

    The technicalMetadata datastream from the previous version of the digital object



69
70
71
72
73
# File 'lib/dor/services/technical_metadata_service.rb', line 69

def self.(dor_item)
  sdr_techmd = (dor_item.pid)
  return sdr_techmd unless sdr_techmd.nil?
  (dor_item)
end

.get_sdr_metadata(druid, dsname) ⇒ String

Returns The datastream contents from the previous version of the digital object (fetched from SDR storage).

Parameters:

  • druid (String)

    The identifier of the digital object being processed by the technical metadata robot

  • dsname (String)

    The identifier of the metadata datastream

Returns:

  • (String)

    The datastream contents from the previous version of the digital object (fetched from SDR storage)



100
101
102
# File 'lib/dor/services/technical_metadata_service.rb', line 100

def self.(druid, dsname)
  Sdr::Client.(druid, dsname)
end

.get_sdr_technical_metadata(druid) ⇒ String

Returns The technicalMetadata datastream from the previous version of the digital object (fetched from SDR storage) The data is updated to the latest format.

Parameters:

  • druid (String)

    The identifier of the digital object being processed by the technical metadata robot

Returns:

  • (String)

    The technicalMetadata datastream from the previous version of the digital object (fetched from SDR storage) The data is updated to the latest format.



78
79
80
81
82
83
# File 'lib/dor/services/technical_metadata_service.rb', line 78

def self.(druid)
  sdr_techmd = (druid, 'technicalMetadata')
  return sdr_techmd if sdr_techmd =~ /<technicalMetadata/
  return ::JhoveService.new.(sdr_techmd) if sdr_techmd =~ /<jhove/
  nil
end

.merge_file_nodes(old_techmd, new_techmd, deltas) ⇒ Hash<String,Nokogiri::XML::Node>

Returns The complete set of technicalMetadata nodes for the digital object, indexed by filename.

Parameters:

  • old_techmd (String)

    The technicalMetadata datastream from the previous version of the digital object

  • new_techmd (String)

    The technicalMetadata datastream for the new files of the new digital object version

  • deltas (Array<String>)

    The list of filenames for files that are either added or modifed since the previous version

Returns:

  • (Hash<String,Nokogiri::XML::Node>)

    The complete set of technicalMetadata nodes for the digital object, indexed by filename



133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
# File 'lib/dor/services/technical_metadata_service.rb', line 133

def self.merge_file_nodes(old_techmd, new_techmd, deltas)
  old_file_nodes = get_file_nodes(old_techmd)
  new_file_nodes = get_file_nodes(new_techmd)
  merged_nodes = {}
  deltas[:identical].each do |path|
    merged_nodes[path] = old_file_nodes[path]
  end
  deltas[:modified].each do |path|
    merged_nodes[path] = new_file_nodes[path]
  end
  deltas[:added].each do |path|
    merged_nodes[path] = new_file_nodes[path]
  end
  deltas[:renamed].each do |oldpath, newpath|
    clone = old_file_nodes[oldpath].clone
    clone.sub!(/<file\s*id.*?["'].*?["'].*?>/, "<file id='#{newpath}'>")
    merged_nodes[newpath] = clone
  end
  deltas[:copyadded].each do |oldpath, newpath|
    clone = old_file_nodes[oldpath].clone
    clone.sub!(/<file\s*id.*?["'].*?["'].*?>/, "<file id='#{newpath}'>")
    merged_nodes[newpath] = clone
  end
  merged_nodes
end

.test_jhove_serviceBoolean

Returns Make sure that the jhove-service gem is loaded.

Returns:

  • (Boolean)

    Make sure that the jhove-service gem is loaded



37
38
39
40
41
42
43
44
45
46
# File 'lib/dor/services/technical_metadata_service.rb', line 37

def self.test_jhove_service
  unless defined? ::JhoveService
    begin
      require 'jhove_service'
    rescue LoadError => e
      puts e.inspect
      raise 'jhove-service dependency gem was not found.  Please add it to your Gemfile and run bundle install'
    end
  end
end

.write_fileset(temp_dir, new_files) ⇒ Pathname

Returns Save the new_files list to a text file and return that file’s name.

Parameters:

  • temp_dir (Pathname)

    The pathname of the temp folder in the object’s workspace area

  • new_files (Object)
    Array<String>

    The list of filenames for files that are either added or modifed since the previous version

Returns:

  • (Pathname)

    Save the new_files list to a text file and return that file’s name



123
124
125
126
127
# File 'lib/dor/services/technical_metadata_service.rb', line 123

def self.write_fileset(temp_dir, new_files)
  fileset_pathname = Pathname(temp_dir).join('jhove_fileset.txt')
  fileset_pathname.open('w') {|f| f.puts(new_files) }
  fileset_pathname
end