Class: Dor::TechnicalMetadataService

Inherits:
Object
  • Object
show all
Defined in:
lib/dor/services/technical_metadata_service.rb

Class Method Summary collapse

Class Method Details

.add_update_technical_metadata(dor_item) ⇒ Boolean


12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# File 'lib/dor/services/technical_metadata_service.rb', line 12

def self.(dor_item)
  test_jhove_service
  druid = dor_item.pid
  content_group_diff = get_content_group_diff(dor_item)
  deltas = get_file_deltas(content_group_diff)
  new_files = get_new_files(deltas)
  old_techmd = (dor_item)
  new_techmd = (druid, new_files)
  if old_techmd.nil?
    # this is version 1 or previous technical metadata was not saved
    final_techmd = new_techmd
  elsif content_group_diff.difference_count == 0
    # there have been no changes to content files from previous version
    return true
  else
    merged_nodes = merge_file_nodes(old_techmd, new_techmd, deltas)
    final_techmd = (druid,merged_nodes)
  end
  ds = dor_item.datastreams["technicalMetadata"]
  ds.dsLabel = 'Technical Metadata'
  ds.content = final_techmd
  ds.save
  true
end

.build_technical_metadata(druid, merged_nodes) ⇒ String


210
211
212
213
214
215
216
217
218
219
220
221
# File 'lib/dor/services/technical_metadata_service.rb', line 210

def self.(druid, merged_nodes)
  techmd_root = "<technicalMetadata objectId='\#{druid}' datetime='\#{Time.now.utc.iso8601}'\nxmlns:jhove='http://hul.harvard.edu/ois/xml/ns/jhove'\nxmlns:mix='http://www.loc.gov/mix/v10'\nxmlns:textmd='info:lc/xmlns/textMD-v3'>\n"
  doc = techmd_root
  merged_nodes.keys.sort.each {|path| doc << merged_nodes[path] }
  doc << "</technicalMetadata>"
  doc
end

.get_content_group_diff(dor_item) ⇒ FileGroupDifference


51
52
53
54
55
56
# File 'lib/dor/services/technical_metadata_service.rb', line 51

def self.get_content_group_diff(dor_item)
  inventory_diff_xml = dor_item.get_content_diff('all')
  inventory_diff = Moab::FileInventoryDifference.parse(inventory_diff_xml)
  content_group_diff = inventory_diff.group_difference("content")
  content_group_diff
end

.get_dor_technical_metadata(dor_item) ⇒ String


100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
# File 'lib/dor/services/technical_metadata_service.rb', line 100

def self.(dor_item)
  ds = "technicalMetadata"
  if dor_item.datastreams.keys.include?(ds) and not dor_item.datastreams[ds].new?
    dor_techmd = dor_item.datastreams[ds].content
  else
    return nil
  end
  if dor_techmd =~ /<technicalMetadata/
    return dor_techmd
  elsif dor_techmd =~ /<jhove/
    return ::JhoveService.new.(dor_techmd)
  else
    return nil
  end
end

.get_file_deltas(content_group_diff) ⇒ Hash<Symbol,Array>


60
61
62
63
# File 'lib/dor/services/technical_metadata_service.rb', line 60

def self.get_file_deltas(content_group_diff)
  deltas = content_group_diff.file_deltas
  deltas
end

.get_file_nodes(technical_metadata) ⇒ Hash<String,Nokogiri::XML::Node>


183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
# File 'lib/dor/services/technical_metadata_service.rb', line 183

def self.get_file_nodes()
  file_hash = Hash.new
  return file_hash if .nil?
  current_file = Array.new
  path = nil
  in_file = false
  .each_line do |line|
    if line =~ /^\s*<file.*["'](.*?)["']/
      current_file << line
      path = $1
      in_file = true
    elsif line =~ /^\s*<\/file>/
      current_file << line
      file_hash[path] = current_file.join
      current_file = Array.new
      path = nil
      in_file = false
    elsif in_file
      current_file << line
    end
  end
  file_hash
end

.get_new_files(deltas) ⇒ Array<String>


67
68
69
# File 'lib/dor/services/technical_metadata_service.rb', line 67

def self.get_new_files(deltas)
  deltas[:added] + deltas[:modified]
end

.get_new_technical_metadata(druid, new_files) ⇒ String


129
130
131
132
133
134
135
136
137
138
139
140
# File 'lib/dor/services/technical_metadata_service.rb', line 129

def self.(druid, new_files)
  return nil if new_files.nil? or new_files.empty?
  workspace = DruidTools::Druid.new(druid, Dor::Config.sdr.local_workspace_root)
  content_dir = workspace.find_filelist_parent('content',new_files)
  temp_dir = workspace.temp_dir
  jhove_service = ::JhoveService.new(temp_dir)
  jhove_service.digital_object_id=druid
  fileset_file = write_fileset(temp_dir, new_files)
  jhove_output_file = jhove_service.run_jhove(content_dir, fileset_file)
  tech_md_file = jhove_service.(jhove_output_file)
  IO.read(tech_md_file)
end

.get_old_technical_metadata(dor_item) ⇒ String


73
74
75
76
77
# File 'lib/dor/services/technical_metadata_service.rb', line 73

def self.(dor_item)
  sdr_techmd = (dor_item.pid)
  return sdr_techmd unless sdr_techmd.nil?
  (dor_item)
end

.get_sdr_metadata(druid, dsname) ⇒ String


119
120
121
122
123
124
# File 'lib/dor/services/technical_metadata_service.rb', line 119

def self.(druid, dsname)
  sdr_client = Dor::Config.sdr.rest_client
  url = "objects/#{druid}/metadata/#{dsname}.xml"
  response = sdr_client[url].get
  response
end

.get_sdr_technical_metadata(druid) ⇒ String


82
83
84
85
86
87
88
89
90
91
92
93
94
95
# File 'lib/dor/services/technical_metadata_service.rb', line 82

def self.(druid)
  begin
    sdr_techmd = (druid, "technicalMetadata")
  rescue RestClient::ResourceNotFound => e
    return nil
  end
  if sdr_techmd =~ /<technicalMetadata/
    return sdr_techmd
  elsif sdr_techmd =~ /<jhove/
    return ::JhoveService.new.(sdr_techmd)
  else
    return nil
  end
end

.merge_file_nodes(old_techmd, new_techmd, deltas) ⇒ Hash<String,Nokogiri::XML::Node>


155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
# File 'lib/dor/services/technical_metadata_service.rb', line 155

def self.merge_file_nodes(old_techmd, new_techmd, deltas)
  old_file_nodes = get_file_nodes(old_techmd)
  new_file_nodes = get_file_nodes(new_techmd)
  merged_nodes = Hash.new
  deltas[:identical].each do |path|
    merged_nodes[path] = old_file_nodes[path]
  end
  deltas[:modified].each do |path|
    merged_nodes[path] = new_file_nodes[path]
  end
  deltas[:added].each do |path|
    merged_nodes[path] = new_file_nodes[path]
  end
  deltas[:renamed].each do |oldpath,newpath|
    clone = old_file_nodes[oldpath].clone
    clone.sub!(/<file\s*id.*?["'].*?["'].*?>/, "<file id='#{newpath}'>")
    merged_nodes[newpath] = clone
  end
  deltas[:copyadded].each do |oldpath,newpath|
    clone = old_file_nodes[oldpath].clone
    clone.sub!(/<file\s*id.*?["'].*?["'].*?>/, "<file id='#{newpath}'>")
    merged_nodes[newpath] = clone
  end
  merged_nodes
end

.test_jhove_serviceBoolean


38
39
40
41
42
43
44
45
46
47
# File 'lib/dor/services/technical_metadata_service.rb', line 38

def self.test_jhove_service
  unless defined? ::JhoveService
    begin
      require 'jhove_service'
    rescue LoadError => e
      puts e.inspect
      raise "jhove-service dependency gem was not found.  Please add it to your Gemfile and run bundle install"
    end
  end
end

.write_fileset(temp_dir, new_files) ⇒ Pathname


145
146
147
148
149
# File 'lib/dor/services/technical_metadata_service.rb', line 145

def self.write_fileset(temp_dir, new_files)
  fileset_pathname = Pathname(temp_dir).join('jhove_fileset.txt')
  fileset_pathname.open('w') {|f| f.puts(new_files) }
  fileset_pathname
end