Class: Dor::TechnicalMetadataService

Inherits:
Object
  • Object
show all
Defined in:
lib/dor/services/technical_metadata_service.rb

Class Method Summary collapse

Class Method Details

.add_update_technical_metadata(dor_item) ⇒ Boolean

Returns True if technical metadata is correctly added or updated.

Parameters:

  • dor_item (Dor::Item)

    The DOR item being processed by the technical metadata robot

Returns:

  • (Boolean)

    True if technical metadata is correctly added or updated



12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# File 'lib/dor/services/technical_metadata_service.rb', line 12

def self.(dor_item)
  test_jhove_service
  druid = dor_item.pid
  content_group_diff = get_content_group_diff(dor_item)
  deltas = get_file_deltas(content_group_diff)
  new_files = get_new_files(deltas)
  old_techmd = (dor_item)
  new_techmd = (druid, new_files)
  if old_techmd.nil?
    # this is version 1 or previous technical metadata was not saved
    final_techmd = new_techmd
  elsif content_group_diff.difference_count == 0
    # there have been no changes to content files from previous version
    return true
  else
    merged_nodes = merge_file_nodes(old_techmd, new_techmd, deltas)
    final_techmd = (druid,merged_nodes)
  end
  ds = dor_item.datastreams["technicalMetadata"]
  ds.dsLabel = 'Technical Metadata'
  ds.content = final_techmd
  ds.save
  true
end

.build_technical_metadata(druid, merged_nodes) ⇒ String

Returns The finalized technicalMetadata datastream contents for the new object version.

Parameters:

  • druid (String)

    The identifier of the digital object being processed by the technical metadata robot

  • merged_nodes (Hash<String,Nokogiri::XML::Node>)

    The complete set of technicalMetadata nodes for the digital object, indexed by filename

Returns:

  • (String)

    The finalized technicalMetadata datastream contents for the new object version



210
211
212
213
214
215
216
217
218
219
220
221
# File 'lib/dor/services/technical_metadata_service.rb', line 210

def self.(druid, merged_nodes)
  techmd_root = <<-EOF
<technicalMetadata objectId='#{druid}' datetime='#{Time.now.utc.iso8601}'
xmlns:jhove='http://hul.harvard.edu/ois/xml/ns/jhove'
xmlns:mix='http://www.loc.gov/mix/v10'
xmlns:textmd='info:lc/xmlns/textMD-v3'>
  EOF
  doc = techmd_root
  merged_nodes.keys.sort.each {|path| doc << merged_nodes[path] }
  doc << "</technicalMetadata>"
  doc
end

.get_content_group_diff(dor_item) ⇒ FileGroupDifference

Returns The differences between two versions of a group of files.

Parameters:

  • dor_item (Dor::Item)

    The DOR item being processed by the technical metadata robot

Returns:

  • (FileGroupDifference)

    The differences between two versions of a group of files



51
52
53
54
55
56
# File 'lib/dor/services/technical_metadata_service.rb', line 51

def self.get_content_group_diff(dor_item)
  inventory_diff_xml = dor_item.get_content_diff('all')
  inventory_diff = Moab::FileInventoryDifference.parse(inventory_diff_xml)
  content_group_diff = inventory_diff.group_difference("content")
  content_group_diff
end

.get_dor_technical_metadata(dor_item) ⇒ String

Returns The technicalMetadata datastream from the previous version of the digital object (fetched from DOR fedora). The data is updated to the latest format.

Parameters:

  • dor_item (Dor::Item)

    The DOR item being processed by the technical metadata robot

Returns:

  • (String)

    The technicalMetadata datastream from the previous version of the digital object (fetched from DOR fedora). The data is updated to the latest format.



100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
# File 'lib/dor/services/technical_metadata_service.rb', line 100

def self.(dor_item)
  ds = "technicalMetadata"
  if dor_item.datastreams.keys.include?(ds) and not dor_item.datastreams[ds].new?
    dor_techmd = dor_item.datastreams[ds].content
  else
    return nil
  end
  if dor_techmd =~ /<technicalMetadata/
    return dor_techmd
  elsif dor_techmd =~ /<jhove/
    return ::JhoveService.new.(dor_techmd)
  else
    return nil
  end
end

.get_file_deltas(content_group_diff) ⇒ Hash<Symbol,Array>

Returns Sets of filenames grouped by change type for use in performing file or metadata operations.

Parameters:

  • content_group_diff (FileGroupDifference)

Returns:

  • (Hash<Symbol,Array>)

    Sets of filenames grouped by change type for use in performing file or metadata operations



60
61
62
63
# File 'lib/dor/services/technical_metadata_service.rb', line 60

def self.get_file_deltas(content_group_diff)
  deltas = content_group_diff.file_deltas
  deltas
end

.get_file_nodes(technical_metadata) ⇒ Hash<String,Nokogiri::XML::Node>

Returns The set of nodes from a technicalMetadata datastream , indexed by filename.

Parameters:

  • technical_metadata (String)

    A technicalMetadata datastream contents

Returns:

  • (Hash<String,Nokogiri::XML::Node>)

    The set of nodes from a technicalMetadata datastream , indexed by filename



183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
# File 'lib/dor/services/technical_metadata_service.rb', line 183

def self.get_file_nodes()
  file_hash = Hash.new
  return file_hash if .nil?
  current_file = Array.new
  path = nil
  in_file = false
  .each_line do |line|
    if line =~ /^\s*<file.*["'](.*?)["']/
      current_file << line
      path = $1
      in_file = true
    elsif line =~ /^\s*<\/file>/
      current_file << line
      file_hash[path] = current_file.join
      current_file = Array.new
      path = nil
      in_file = false
    elsif in_file
      current_file << line
    end
  end
  file_hash
end

.get_new_files(deltas) ⇒ Array<String>

Returns The list of filenames for files that are either added or modifed since the previous version.

Parameters:

  • deltas (Hash<Symbol,Array>)

    Sets of filenames grouped by change type for use in performing file or metadata operations

Returns:

  • (Array<String>)

    The list of filenames for files that are either added or modifed since the previous version



67
68
69
# File 'lib/dor/services/technical_metadata_service.rb', line 67

def self.get_new_files(deltas)
  deltas[:added] + deltas[:modified]
end

.get_new_technical_metadata(druid, new_files) ⇒ String

Returns The technicalMetadata datastream for the new files of the new digital object version.

Parameters:

  • druid (DruidTools::Druid)

    A wrapper class for the druid identifier. Used to generate paths

  • new_files (Array<String>)

    The list of filenames for files that are either added or modifed since the previous version

Returns:

  • (String)

    The technicalMetadata datastream for the new files of the new digital object version



129
130
131
132
133
134
135
136
137
138
139
140
# File 'lib/dor/services/technical_metadata_service.rb', line 129

def self.(druid, new_files)
  return nil if new_files.nil? or new_files.empty?
  workspace = DruidTools::Druid.new(druid, Dor::Config.sdr.local_workspace_root)
  content_dir = workspace.find_filelist_parent('content',new_files)
  temp_dir = workspace.temp_dir
  jhove_service = ::JhoveService.new(temp_dir)
  jhove_service.digital_object_id=druid
  fileset_file = write_fileset(temp_dir, new_files)
  jhove_output_file = jhove_service.run_jhove(content_dir, fileset_file)
  tech_md_file = jhove_service.(jhove_output_file)
  IO.read(tech_md_file)
end

.get_old_technical_metadata(dor_item) ⇒ String

Returns The technicalMetadata datastream from the previous version of the digital object.

Parameters:

  • dor_item (Dor::Item)

    The DOR item being processed by the technical metadata robot

Returns:

  • (String)

    The technicalMetadata datastream from the previous version of the digital object



73
74
75
76
77
# File 'lib/dor/services/technical_metadata_service.rb', line 73

def self.(dor_item)
  sdr_techmd = (dor_item.pid)
  return sdr_techmd unless sdr_techmd.nil?
  (dor_item)
end

.get_sdr_metadata(druid, dsname) ⇒ String

Returns The datastream contents from the previous version of the digital object (fetched from SDR storage).

Parameters:

  • druid (String)

    The identifier of the digital object being processed by the technical metadata robot

  • dsname (String)

    The identifier of the metadata datastream

Returns:

  • (String)

    The datastream contents from the previous version of the digital object (fetched from SDR storage)



119
120
121
122
123
124
# File 'lib/dor/services/technical_metadata_service.rb', line 119

def self.(druid, dsname)
  sdr_client = Dor::Config.sdr.rest_client
  url = "objects/#{druid}/metadata/#{dsname}.xml"
  response = sdr_client[url].get
  response
end

.get_sdr_technical_metadata(druid) ⇒ String

Returns The technicalMetadata datastream from the previous version of the digital object (fetched from SDR storage) The data is updated to the latest format.

Parameters:

  • druid (String)

    The identifier of the digital object being processed by the technical metadata robot

Returns:

  • (String)

    The technicalMetadata datastream from the previous version of the digital object (fetched from SDR storage) The data is updated to the latest format.



82
83
84
85
86
87
88
89
90
91
92
93
94
95
# File 'lib/dor/services/technical_metadata_service.rb', line 82

def self.(druid)
  begin
    sdr_techmd = (druid, "technicalMetadata")
  rescue RestClient::ResourceNotFound => e
    return nil
  end
  if sdr_techmd =~ /<technicalMetadata/
    return sdr_techmd
  elsif sdr_techmd =~ /<jhove/
    return ::JhoveService.new.(sdr_techmd)
  else
    return nil
  end
end

.merge_file_nodes(old_techmd, new_techmd, deltas) ⇒ Hash<String,Nokogiri::XML::Node>

Returns The complete set of technicalMetadata nodes for the digital object, indexed by filename.

Parameters:

  • old_techmd (String)

    The technicalMetadata datastream from the previous version of the digital object

  • new_techmd (String)

    The technicalMetadata datastream for the new files of the new digital object version

  • deltas (Array<String>)

    The list of filenames for files that are either added or modifed since the previous version

Returns:

  • (Hash<String,Nokogiri::XML::Node>)

    The complete set of technicalMetadata nodes for the digital object, indexed by filename



155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
# File 'lib/dor/services/technical_metadata_service.rb', line 155

def self.merge_file_nodes(old_techmd, new_techmd, deltas)
  old_file_nodes = get_file_nodes(old_techmd)
  new_file_nodes = get_file_nodes(new_techmd)
  merged_nodes = Hash.new
  deltas[:identical].each do |path|
    merged_nodes[path] = old_file_nodes[path]
  end
  deltas[:modified].each do |path|
    merged_nodes[path] = new_file_nodes[path]
  end
  deltas[:added].each do |path|
    merged_nodes[path] = new_file_nodes[path]
  end
  deltas[:renamed].each do |oldpath,newpath|
    clone = old_file_nodes[oldpath].clone
    clone.sub!(/<file\s*id.*?["'].*?["'].*?>/, "<file id='#{newpath}'>")
    merged_nodes[newpath] = clone
  end
  deltas[:copyadded].each do |oldpath,newpath|
    clone = old_file_nodes[oldpath].clone
    clone.sub!(/<file\s*id.*?["'].*?["'].*?>/, "<file id='#{newpath}'>")
    merged_nodes[newpath] = clone
  end
  merged_nodes
end

.test_jhove_serviceBoolean

Returns Make sure that the jhove-service gem is loaded.

Returns:

  • (Boolean)

    Make sure that the jhove-service gem is loaded



38
39
40
41
42
43
44
45
46
47
# File 'lib/dor/services/technical_metadata_service.rb', line 38

def self.test_jhove_service
  unless defined? ::JhoveService
    begin
      require 'jhove_service'
    rescue LoadError => e
      puts e.inspect
      raise "jhove-service dependency gem was not found.  Please add it to your Gemfile and run bundle install"
    end
  end
end

.write_fileset(temp_dir, new_files) ⇒ Pathname

Returns Save the new_files list to a text file and return that file’s name.

Parameters:

  • temp_dir (Pathname)

    The pathname of the temp folder in the object’s workspace area

  • new_files (Object)
    Array<String>

    The list of filenames for files that are either added or modifed since the previous version

Returns:

  • (Pathname)

    Save the new_files list to a text file and return that file’s name



145
146
147
148
149
# File 'lib/dor/services/technical_metadata_service.rb', line 145

def self.write_fileset(temp_dir, new_files)
  fileset_pathname = Pathname(temp_dir).join('jhove_fileset.txt')
  fileset_pathname.open('w') {|f| f.puts(new_files) }
  fileset_pathname
end