Class: Dor::TechnicalMetadataService

Inherits:
Object
  • Object
show all
Defined in:
lib/dor/services/technical_metadata_service.rb

Class Method Summary collapse

Class Method Details

.add_update_technical_metadata(dor_item) ⇒ Boolean



11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
# File 'lib/dor/services/technical_metadata_service.rb', line 11

def self.(dor_item)
  test_jhove_service
  druid = dor_item.pid
  content_group_diff = get_content_group_diff(dor_item)
  deltas = get_file_deltas(content_group_diff)
  new_files = get_new_files(deltas)
  old_techmd = (dor_item)
  new_techmd = (druid, new_files)
  if old_techmd.nil?
    # this is version 1 or previous technical metadata was not saved
    final_techmd = new_techmd
  elsif content_group_diff.difference_count == 0
    # there have been no changes to content files from previous version
    return true
  else
    merged_nodes = merge_file_nodes(old_techmd, new_techmd, deltas)
    final_techmd = (druid, merged_nodes)
  end
  ds = dor_item.datastreams['technicalMetadata']
  ds.dsLabel = 'Technical Metadata'
  ds.content = final_techmd
  ds.save
  true
end

.build_technical_metadata(druid, merged_nodes) ⇒ String



190
191
192
193
194
195
196
197
198
199
200
# File 'lib/dor/services/technical_metadata_service.rb', line 190

def self.(druid, merged_nodes)
  techmd_root = "<technicalMetadata objectId='\#{druid}' datetime='\#{Time.now.utc.iso8601}'\nxmlns:jhove='http://hul.harvard.edu/ois/xml/ns/jhove'\nxmlns:mix='http://www.loc.gov/mix/v10'\nxmlns:textmd='info:lc/xmlns/textMD-v3'>\n  EOF\n  doc = techmd_root\n  merged_nodes.keys.sort.each {|path| doc << merged_nodes[path] }\n  doc + '</technicalMetadata>'\nend\n"

.get_content_group_diff(dor_item) ⇒ FileGroupDifference



50
51
52
53
54
55
# File 'lib/dor/services/technical_metadata_service.rb', line 50

def self.get_content_group_diff(dor_item)
  inventory_diff = dor_item.get_content_diff('all')
  inventory_diff.group_difference('content')
rescue Dor::Exception # no contentMetadata
  Moab::FileGroupDifference.new
end

.get_dor_technical_metadata(dor_item) ⇒ String



90
91
92
93
94
95
96
97
# File 'lib/dor/services/technical_metadata_service.rb', line 90

def self.(dor_item)
  ds = 'technicalMetadata'
  return nil unless dor_item.datastreams.keys.include?(ds) && !dor_item.datastreams[ds].new?
  dor_techmd = dor_item.datastreams[ds].content
  return dor_techmd if dor_techmd =~ /<technicalMetadata/
  return ::JhoveService.new.(dor_techmd) if dor_techmd =~ /<jhove/
  nil
end

.get_file_deltas(content_group_diff) ⇒ Hash<Symbol,Array>



59
60
61
# File 'lib/dor/services/technical_metadata_service.rb', line 59

def self.get_file_deltas(content_group_diff)
  content_group_diff.file_deltas
end

.get_file_nodes(technical_metadata) ⇒ Hash<String,Nokogiri::XML::Node>



163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
# File 'lib/dor/services/technical_metadata_service.rb', line 163

def self.get_file_nodes()
  file_hash = {}
  return file_hash if .nil?
  current_file = []
  path = nil
  in_file = false
  .each_line do |line|
    if line =~ /^\s*<file.*["'](.*?)["']/
      current_file << line
      path = $1
      in_file = true
    elsif line =~ /^\s*<\/file>/
      current_file << line
      file_hash[path] = current_file.join
      current_file = []
      path = nil
      in_file = false
    elsif in_file
      current_file << line
    end
  end
  file_hash
end

.get_new_files(deltas) ⇒ Array<String>



65
66
67
# File 'lib/dor/services/technical_metadata_service.rb', line 65

def self.get_new_files(deltas)
  deltas[:added] + deltas[:modified]
end

.get_new_technical_metadata(druid, new_files) ⇒ String



109
110
111
112
113
114
115
116
117
118
119
120
# File 'lib/dor/services/technical_metadata_service.rb', line 109

def self.(druid, new_files)
  return nil if new_files.nil? || new_files.empty?
  workspace = DruidTools::Druid.new(druid, Dor::Config.sdr.local_workspace_root)
  content_dir = workspace.find_filelist_parent('content', new_files)
  temp_dir = workspace.temp_dir
  jhove_service = ::JhoveService.new(temp_dir)
  jhove_service.digital_object_id = druid
  fileset_file = write_fileset(temp_dir, new_files)
  jhove_output_file = jhove_service.run_jhove(content_dir, fileset_file)
  tech_md_file = jhove_service.(jhove_output_file)
  IO.read(tech_md_file)
end

.get_old_technical_metadata(dor_item) ⇒ String



71
72
73
74
75
# File 'lib/dor/services/technical_metadata_service.rb', line 71

def self.(dor_item)
  sdr_techmd = (dor_item.pid)
  return sdr_techmd unless sdr_techmd.nil?
  (dor_item)
end

.get_sdr_metadata(druid, dsname) ⇒ String



102
103
104
# File 'lib/dor/services/technical_metadata_service.rb', line 102

def self.(druid, dsname)
  Sdr::Client.(druid, dsname)
end

.get_sdr_technical_metadata(druid) ⇒ String



80
81
82
83
84
85
# File 'lib/dor/services/technical_metadata_service.rb', line 80

def self.(druid)
  sdr_techmd = (druid, 'technicalMetadata')
  return sdr_techmd if sdr_techmd =~ /<technicalMetadata/
  return ::JhoveService.new.(sdr_techmd) if sdr_techmd =~ /<jhove/
  nil
end

.merge_file_nodes(old_techmd, new_techmd, deltas) ⇒ Hash<String,Nokogiri::XML::Node>



135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
# File 'lib/dor/services/technical_metadata_service.rb', line 135

def self.merge_file_nodes(old_techmd, new_techmd, deltas)
  old_file_nodes = get_file_nodes(old_techmd)
  new_file_nodes = get_file_nodes(new_techmd)
  merged_nodes = {}
  deltas[:identical].each do |path|
    merged_nodes[path] = old_file_nodes[path]
  end
  deltas[:modified].each do |path|
    merged_nodes[path] = new_file_nodes[path]
  end
  deltas[:added].each do |path|
    merged_nodes[path] = new_file_nodes[path]
  end
  deltas[:renamed].each do |oldpath, newpath|
    clone = old_file_nodes[oldpath].clone
    clone.sub!(/<file\s*id.*?["'].*?["'].*?>/, "<file id='#{newpath}'>")
    merged_nodes[newpath] = clone
  end
  deltas[:copyadded].each do |oldpath, newpath|
    clone = old_file_nodes[oldpath].clone
    clone.sub!(/<file\s*id.*?["'].*?["'].*?>/, "<file id='#{newpath}'>")
    merged_nodes[newpath] = clone
  end
  merged_nodes
end

.test_jhove_serviceBoolean



37
38
39
40
41
42
43
44
45
46
# File 'lib/dor/services/technical_metadata_service.rb', line 37

def self.test_jhove_service
  unless defined? ::JhoveService
    begin
      require 'jhove_service'
    rescue LoadError => e
      puts e.inspect
      raise 'jhove-service dependency gem was not found.  Please add it to your Gemfile and run bundle install'
    end
  end
end

.write_fileset(temp_dir, new_files) ⇒ Pathname



125
126
127
128
129
# File 'lib/dor/services/technical_metadata_service.rb', line 125

def self.write_fileset(temp_dir, new_files)
  fileset_pathname = Pathname(temp_dir).join('jhove_fileset.txt')
  fileset_pathname.open('w') {|f| f.puts(new_files) }
  fileset_pathname
end