Class: Dor::TechnicalMetadataService

Inherits:
Object
  • Object
show all
Defined in:
lib/dor/services/technical_metadata_service.rb

Class Method Summary collapse

Class Method Details

.add_update_technical_metadata(dor_item) ⇒ Boolean



11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
# File 'lib/dor/services/technical_metadata_service.rb', line 11

def self.(dor_item)
  test_jhove_service
  druid = dor_item.pid
  content_group_diff = get_content_group_diff(dor_item)
  deltas = get_file_deltas(content_group_diff)
  new_files = get_new_files(deltas)
  old_techmd = (dor_item)
  new_techmd = (druid, new_files)
  if old_techmd.nil?
    # this is version 1 or previous technical metadata was not saved
    final_techmd = new_techmd
  elsif content_group_diff.difference_count == 0
    # there have been no changes to content files from previous version
    return true
  else
    merged_nodes = merge_file_nodes(old_techmd, new_techmd, deltas)
    final_techmd = (druid, merged_nodes)
  end
  ds = dor_item.datastreams['technicalMetadata']
  ds.dsLabel = 'Technical Metadata'
  ds.content = final_techmd
  ds.save
  true
end

.build_technical_metadata(druid, merged_nodes) ⇒ String



195
196
197
198
199
200
201
202
203
204
205
# File 'lib/dor/services/technical_metadata_service.rb', line 195

def self.(druid, merged_nodes)
  techmd_root = "<technicalMetadata objectId='\#{druid}' datetime='\#{Time.now.utc.iso8601}'\nxmlns:jhove='http://hul.harvard.edu/ois/xml/ns/jhove'\nxmlns:mix='http://www.loc.gov/mix/v10'\nxmlns:textmd='info:lc/xmlns/textMD-v3'>\n  EOF\n  doc = techmd_root\n  merged_nodes.keys.sort.each {|path| doc << merged_nodes[path] }\n  doc + '</technicalMetadata>'\nend\n"

.get_content_group_diff(dor_item) ⇒ FileGroupDifference



50
51
52
53
54
# File 'lib/dor/services/technical_metadata_service.rb', line 50

def self.get_content_group_diff(dor_item)
  inventory_diff_xml = dor_item.get_content_diff('all')
  inventory_diff = Moab::FileInventoryDifference.parse(inventory_diff_xml)
  inventory_diff.group_difference('content')
end

.get_dor_technical_metadata(dor_item) ⇒ String



93
94
95
96
97
98
99
100
# File 'lib/dor/services/technical_metadata_service.rb', line 93

def self.(dor_item)
  ds = 'technicalMetadata'
  return nil unless dor_item.datastreams.keys.include?(ds) && !dor_item.datastreams[ds].new?
  dor_techmd = dor_item.datastreams[ds].content
  return dor_techmd if dor_techmd =~ /<technicalMetadata/
  return ::JhoveService.new.(dor_techmd) if dor_techmd =~ /<jhove/
  nil
end

.get_file_deltas(content_group_diff) ⇒ Hash<Symbol,Array>



58
59
60
# File 'lib/dor/services/technical_metadata_service.rb', line 58

def self.get_file_deltas(content_group_diff)
  content_group_diff.file_deltas
end

.get_file_nodes(technical_metadata) ⇒ Hash<String,Nokogiri::XML::Node>



168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
# File 'lib/dor/services/technical_metadata_service.rb', line 168

def self.get_file_nodes()
  file_hash = {}
  return file_hash if .nil?
  current_file = []
  path = nil
  in_file = false
  .each_line do |line|
    if line =~ /^\s*<file.*["'](.*?)["']/
      current_file << line
      path = $1
      in_file = true
    elsif line =~ /^\s*<\/file>/
      current_file << line
      file_hash[path] = current_file.join
      current_file = []
      path = nil
      in_file = false
    elsif in_file
      current_file << line
    end
  end
  file_hash
end

.get_new_files(deltas) ⇒ Array<String>



64
65
66
# File 'lib/dor/services/technical_metadata_service.rb', line 64

def self.get_new_files(deltas)
  deltas[:added] + deltas[:modified]
end

.get_new_technical_metadata(druid, new_files) ⇒ String



114
115
116
117
118
119
120
121
122
123
124
125
# File 'lib/dor/services/technical_metadata_service.rb', line 114

def self.(druid, new_files)
  return nil if new_files.nil? || new_files.empty?
  workspace = DruidTools::Druid.new(druid, Dor::Config.sdr.local_workspace_root)
  content_dir = workspace.find_filelist_parent('content', new_files)
  temp_dir = workspace.temp_dir
  jhove_service = ::JhoveService.new(temp_dir)
  jhove_service.digital_object_id = druid
  fileset_file = write_fileset(temp_dir, new_files)
  jhove_output_file = jhove_service.run_jhove(content_dir, fileset_file)
  tech_md_file = jhove_service.(jhove_output_file)
  IO.read(tech_md_file)
end

.get_old_technical_metadata(dor_item) ⇒ String



70
71
72
73
74
# File 'lib/dor/services/technical_metadata_service.rb', line 70

def self.(dor_item)
  sdr_techmd = (dor_item.pid)
  return sdr_techmd unless sdr_techmd.nil?
  (dor_item)
end

.get_sdr_metadata(druid, dsname) ⇒ String



105
106
107
108
109
# File 'lib/dor/services/technical_metadata_service.rb', line 105

def self.(druid, dsname)
  sdr_client = Dor::Config.sdr.rest_client
  url = "objects/#{druid}/metadata/#{dsname}.xml"
  sdr_client[url].get
end

.get_sdr_technical_metadata(druid) ⇒ String



79
80
81
82
83
84
85
86
87
88
# File 'lib/dor/services/technical_metadata_service.rb', line 79

def self.(druid)
  begin
    sdr_techmd = (druid, 'technicalMetadata')
  rescue RestClient::ResourceNotFound
    return nil
  end
  return sdr_techmd if sdr_techmd =~ /<technicalMetadata/
  return ::JhoveService.new.(sdr_techmd) if sdr_techmd =~ /<jhove/
  nil
end

.merge_file_nodes(old_techmd, new_techmd, deltas) ⇒ Hash<String,Nokogiri::XML::Node>



140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
# File 'lib/dor/services/technical_metadata_service.rb', line 140

def self.merge_file_nodes(old_techmd, new_techmd, deltas)
  old_file_nodes = get_file_nodes(old_techmd)
  new_file_nodes = get_file_nodes(new_techmd)
  merged_nodes = {}
  deltas[:identical].each do |path|
    merged_nodes[path] = old_file_nodes[path]
  end
  deltas[:modified].each do |path|
    merged_nodes[path] = new_file_nodes[path]
  end
  deltas[:added].each do |path|
    merged_nodes[path] = new_file_nodes[path]
  end
  deltas[:renamed].each do |oldpath, newpath|
    clone = old_file_nodes[oldpath].clone
    clone.sub!(/<file\s*id.*?["'].*?["'].*?>/, "<file id='#{newpath}'>")
    merged_nodes[newpath] = clone
  end
  deltas[:copyadded].each do |oldpath, newpath|
    clone = old_file_nodes[oldpath].clone
    clone.sub!(/<file\s*id.*?["'].*?["'].*?>/, "<file id='#{newpath}'>")
    merged_nodes[newpath] = clone
  end
  merged_nodes
end

.test_jhove_serviceBoolean



37
38
39
40
41
42
43
44
45
46
# File 'lib/dor/services/technical_metadata_service.rb', line 37

def self.test_jhove_service
  unless defined? ::JhoveService
    begin
      require 'jhove_service'
    rescue LoadError => e
      puts e.inspect
      raise 'jhove-service dependency gem was not found.  Please add it to your Gemfile and run bundle install'
    end
  end
end

.write_fileset(temp_dir, new_files) ⇒ Pathname



130
131
132
133
134
# File 'lib/dor/services/technical_metadata_service.rb', line 130

def self.write_fileset(temp_dir, new_files)
  fileset_pathname = Pathname(temp_dir).join('jhove_fileset.txt')
  fileset_pathname.open('w') {|f| f.puts(new_files) }
  fileset_pathname
end