Class: Dor::TechnicalMetadataService

Inherits:
Object
  • Object
show all
Defined in:
lib/dor/services/technical_metadata_service.rb

Class Method Summary collapse

Class Method Details

.add_update_technical_metadata(dor_item) ⇒ Boolean

Returns True if technical metadata is correctly added or updated.

Parameters:

  • dor_item (Dor::Item)

    The DOR item being processed by the technical metadata robot

Returns:

  • (Boolean)

    True if technical metadata is correctly added or updated



11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
# File 'lib/dor/services/technical_metadata_service.rb', line 11

def self.(dor_item)
  test_jhove_service
  druid = dor_item.pid
  content_group_diff = get_content_group_diff(dor_item)
  deltas = get_file_deltas(content_group_diff)
  new_files = get_new_files(deltas)
  old_techmd = (dor_item)
  new_techmd = (druid, new_files)
  if old_techmd.nil?
    # this is version 1 or previous technical metadata was not saved
    final_techmd = new_techmd
  elsif content_group_diff.difference_count == 0
    # there have been no changes to content files from previous version
    return true
  else
    merged_nodes = merge_file_nodes(old_techmd, new_techmd, deltas)
    final_techmd = (druid, merged_nodes)
  end
  ds = dor_item.datastreams['technicalMetadata']
  ds.dsLabel = 'Technical Metadata'
  ds.content = final_techmd
  ds.save
  true
end

.build_technical_metadata(druid, merged_nodes) ⇒ String

Returns The finalized technicalMetadata datastream contents for the new object version.

Parameters:

  • druid (String)

    The identifier of the digital object being processed by the technical metadata robot

  • merged_nodes (Hash<String,Nokogiri::XML::Node>)

    The complete set of technicalMetadata nodes for the digital object, indexed by filename

Returns:

  • (String)

    The finalized technicalMetadata datastream contents for the new object version



190
191
192
193
194
195
196
197
198
199
200
# File 'lib/dor/services/technical_metadata_service.rb', line 190

def self.(druid, merged_nodes)
  techmd_root = <<-EOF
<technicalMetadata objectId='#{druid}' datetime='#{Time.now.utc.iso8601}'
xmlns:jhove='http://hul.harvard.edu/ois/xml/ns/jhove'
xmlns:mix='http://www.loc.gov/mix/v10'
xmlns:textmd='info:lc/xmlns/textMD-v3'>
  EOF
  doc = techmd_root
  merged_nodes.keys.sort.each {|path| doc << merged_nodes[path] }
  doc + '</technicalMetadata>'
end

.get_content_group_diff(dor_item) ⇒ FileGroupDifference

Returns The differences between two versions of a group of files.

Parameters:

  • dor_item (Dor::Item)

    The DOR item being processed by the technical metadata robot

Returns:

  • (FileGroupDifference)

    The differences between two versions of a group of files



50
51
52
53
54
55
# File 'lib/dor/services/technical_metadata_service.rb', line 50

def self.get_content_group_diff(dor_item)
  inventory_diff = dor_item.get_content_diff('all')
  inventory_diff.group_difference('content')
rescue Dor::Exception # no contentMetadata
  Moab::FileGroupDifference.new
end

.get_dor_technical_metadata(dor_item) ⇒ String

Returns The technicalMetadata datastream from the previous version of the digital object (fetched from DOR fedora). The data is updated to the latest format.

Parameters:

  • dor_item (Dor::Item)

    The DOR item being processed by the technical metadata robot

Returns:

  • (String)

    The technicalMetadata datastream from the previous version of the digital object (fetched from DOR fedora). The data is updated to the latest format.



90
91
92
93
94
95
96
97
# File 'lib/dor/services/technical_metadata_service.rb', line 90

def self.(dor_item)
  ds = 'technicalMetadata'
  return nil unless dor_item.datastreams.keys.include?(ds) && !dor_item.datastreams[ds].new?
  dor_techmd = dor_item.datastreams[ds].content
  return dor_techmd if dor_techmd =~ /<technicalMetadata/
  return ::JhoveService.new.(dor_techmd) if dor_techmd =~ /<jhove/
  nil
end

.get_file_deltas(content_group_diff) ⇒ Hash<Symbol,Array>

Returns Sets of filenames grouped by change type for use in performing file or metadata operations.

Parameters:

  • content_group_diff (FileGroupDifference)

Returns:

  • (Hash<Symbol,Array>)

    Sets of filenames grouped by change type for use in performing file or metadata operations



59
60
61
# File 'lib/dor/services/technical_metadata_service.rb', line 59

def self.get_file_deltas(content_group_diff)
  content_group_diff.file_deltas
end

.get_file_nodes(technical_metadata) ⇒ Hash<String,Nokogiri::XML::Node>

Returns The set of nodes from a technicalMetadata datastream, indexed by filename.

Parameters:

  • technical_metadata (String)

    A technicalMetadata datastream contents

Returns:

  • (Hash<String,Nokogiri::XML::Node>)

    The set of nodes from a technicalMetadata datastream, indexed by filename



163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
# File 'lib/dor/services/technical_metadata_service.rb', line 163

def self.get_file_nodes()
  file_hash = {}
  return file_hash if .nil?
  current_file = []
  path = nil
  in_file = false
  .each_line do |line|
    if line =~ /^\s*<file.*["'](.*?)["']/
      current_file << line
      path = $1
      in_file = true
    elsif line =~ /^\s*<\/file>/
      current_file << line
      file_hash[path] = current_file.join
      current_file = []
      path = nil
      in_file = false
    elsif in_file
      current_file << line
    end
  end
  file_hash
end

.get_new_files(deltas) ⇒ Array<String>

Returns The list of filenames for files that are either added or modifed since the previous version.

Parameters:

  • deltas (Hash<Symbol,Array>)

    Sets of filenames grouped by change type for use in performing file or metadata operations

Returns:

  • (Array<String>)

    The list of filenames for files that are either added or modifed since the previous version



65
66
67
# File 'lib/dor/services/technical_metadata_service.rb', line 65

def self.get_new_files(deltas)
  deltas[:added] + deltas[:modified]
end

.get_new_technical_metadata(druid, new_files) ⇒ String

Returns The technicalMetadata datastream for the new files of the new digital object version.

Parameters:

  • druid (DruidTools::Druid)

    A wrapper class for the druid identifier. Used to generate paths

  • new_files (Array<String>)

    The list of filenames for files that are either added or modifed since the previous version

Returns:

  • (String)

    The technicalMetadata datastream for the new files of the new digital object version



109
110
111
112
113
114
115
116
117
118
119
120
# File 'lib/dor/services/technical_metadata_service.rb', line 109

def self.(druid, new_files)
  return nil if new_files.nil? || new_files.empty?
  workspace = DruidTools::Druid.new(druid, Dor::Config.sdr.local_workspace_root)
  content_dir = workspace.find_filelist_parent('content', new_files)
  temp_dir = workspace.temp_dir
  jhove_service = ::JhoveService.new(temp_dir)
  jhove_service.digital_object_id = druid
  fileset_file = write_fileset(temp_dir, new_files)
  jhove_output_file = jhove_service.run_jhove(content_dir, fileset_file)
  tech_md_file = jhove_service.(jhove_output_file)
  IO.read(tech_md_file)
end

.get_old_technical_metadata(dor_item) ⇒ String

Returns The technicalMetadata datastream from the previous version of the digital object.

Parameters:

  • dor_item (Dor::Item)

    The DOR item being processed by the technical metadata robot

Returns:

  • (String)

    The technicalMetadata datastream from the previous version of the digital object



71
72
73
74
75
# File 'lib/dor/services/technical_metadata_service.rb', line 71

def self.(dor_item)
  sdr_techmd = (dor_item.pid)
  return sdr_techmd unless sdr_techmd.nil?
  (dor_item)
end

.get_sdr_metadata(druid, dsname) ⇒ String

Returns The datastream contents from the previous version of the digital object (fetched from SDR storage).

Parameters:

  • druid (String)

    The identifier of the digital object being processed by the technical metadata robot

  • dsname (String)

    The identifier of the metadata datastream

Returns:

  • (String)

    The datastream contents from the previous version of the digital object (fetched from SDR storage)



102
103
104
# File 'lib/dor/services/technical_metadata_service.rb', line 102

def self.(druid, dsname)
  Sdr::Client.(druid, dsname)
end

.get_sdr_technical_metadata(druid) ⇒ String

Returns The technicalMetadata datastream from the previous version of the digital object (fetched from SDR storage) The data is updated to the latest format.

Parameters:

  • druid (String)

    The identifier of the digital object being processed by the technical metadata robot

Returns:

  • (String)

    The technicalMetadata datastream from the previous version of the digital object (fetched from SDR storage) The data is updated to the latest format.



80
81
82
83
84
85
# File 'lib/dor/services/technical_metadata_service.rb', line 80

def self.(druid)
  sdr_techmd = (druid, 'technicalMetadata')
  return sdr_techmd if sdr_techmd =~ /<technicalMetadata/
  return ::JhoveService.new.(sdr_techmd) if sdr_techmd =~ /<jhove/
  nil
end

.merge_file_nodes(old_techmd, new_techmd, deltas) ⇒ Hash<String,Nokogiri::XML::Node>

Returns The complete set of technicalMetadata nodes for the digital object, indexed by filename.

Parameters:

  • old_techmd (String)

    The technicalMetadata datastream from the previous version of the digital object

  • new_techmd (String)

    The technicalMetadata datastream for the new files of the new digital object version

  • deltas (Array<String>)

    The list of filenames for files that are either added or modifed since the previous version

Returns:

  • (Hash<String,Nokogiri::XML::Node>)

    The complete set of technicalMetadata nodes for the digital object, indexed by filename



135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
# File 'lib/dor/services/technical_metadata_service.rb', line 135

def self.merge_file_nodes(old_techmd, new_techmd, deltas)
  old_file_nodes = get_file_nodes(old_techmd)
  new_file_nodes = get_file_nodes(new_techmd)
  merged_nodes = {}
  deltas[:identical].each do |path|
    merged_nodes[path] = old_file_nodes[path]
  end
  deltas[:modified].each do |path|
    merged_nodes[path] = new_file_nodes[path]
  end
  deltas[:added].each do |path|
    merged_nodes[path] = new_file_nodes[path]
  end
  deltas[:renamed].each do |oldpath, newpath|
    clone = old_file_nodes[oldpath].clone
    clone.sub!(/<file\s*id.*?["'].*?["'].*?>/, "<file id='#{newpath}'>")
    merged_nodes[newpath] = clone
  end
  deltas[:copyadded].each do |oldpath, newpath|
    clone = old_file_nodes[oldpath].clone
    clone.sub!(/<file\s*id.*?["'].*?["'].*?>/, "<file id='#{newpath}'>")
    merged_nodes[newpath] = clone
  end
  merged_nodes
end

.test_jhove_serviceBoolean

Returns Make sure that the jhove-service gem is loaded.

Returns:

  • (Boolean)

    Make sure that the jhove-service gem is loaded



37
38
39
40
41
42
43
44
45
46
# File 'lib/dor/services/technical_metadata_service.rb', line 37

def self.test_jhove_service
  unless defined? ::JhoveService
    begin
      require 'jhove_service'
    rescue LoadError => e
      puts e.inspect
      raise 'jhove-service dependency gem was not found.  Please add it to your Gemfile and run bundle install'
    end
  end
end

.write_fileset(temp_dir, new_files) ⇒ Pathname

Returns Save the new_files list to a text file and return that file’s name.

Parameters:

  • temp_dir (Pathname)

    The pathname of the temp folder in the object’s workspace area

  • new_files (Object)
    Array<String>

    The list of filenames for files that are either added or modifed since the previous version

Returns:

  • (Pathname)

    Save the new_files list to a text file and return that file’s name



125
126
127
128
129
# File 'lib/dor/services/technical_metadata_service.rb', line 125

def self.write_fileset(temp_dir, new_files)
  fileset_pathname = Pathname(temp_dir).join('jhove_fileset.txt')
  fileset_pathname.open('w') {|f| f.puts(new_files) }
  fileset_pathname
end