Class: Dor::ContentMetadataDS

Inherits:
ActiveFedora::OmDatastream
  • Object
show all
Includes:
Upgradable
Defined in:
lib/dor/datastreams/content_metadata_ds.rb

Instance Method Summary collapse

Methods included from Upgradable

add_upgrade_callback, included, run_upgrade_callbacks, #upgrade!

Instance Method Details

#add_file(file, resource_name) ⇒ Nokogiri::XML::Node

Returns the added XML node.

Parameters:

  • file (Object)
  • resource_name (String)

Returns:



158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 158

def add_file(file, resource_name)
  resource_nodes = ng_xml.search('//resource[@id=\'' + resource_name + '\']')
  raise 'resource doesnt exist.' if resource_nodes.length == 0
  node = resource_nodes.first
  file_node = Nokogiri::XML::Node.new('file', ng_xml)
  file_node['id'] = file[:name]
  file_node['shelve'  ] = file[:shelve  ] ? file[:shelve  ] : ''
  file_node['publish' ] = file[:publish ] ? file[:publish ] : ''
  file_node['preserve'] = file[:preserve] ? file[:preserve] : ''
  node.add_child(file_node)

  [:md5, :sha1].each do |algo|
    next unless file[algo]
    checksum_node = Nokogiri::XML::Node.new('checksum', ng_xml)
    checksum_node['type'] = algo.to_s
    checksum_node.content = file[algo]
    file_node.add_child(checksum_node)
  end
  file_node['size'    ] = file[:size     ] if file[:size     ]
  file_node['mimetype'] = file[:mime_type] if file[:mime_type]
  self.content = ng_xml.to_s
  file_node
end

#add_resource(files, resource_name, position, type = 'file') ⇒ Nokogiri::XML::Node

Returns the new resource that was added to the contentMetadata.

Parameters:

  • files (Array)
  • resource_name (String)

    ID of the resource

  • position (Integer)
  • type (String) (defaults to: 'file')

    Resource type

Returns:



212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 212

def add_resource(files, resource_name, position, type = 'file')
  raise "resource #{resource_name} already exists" if ng_xml.search('//resource[@id=\'' + resource_name + '\']').length > 0
  max = ng_xml.search('//resource').map { |node| node['sequence'].to_i }.max
  # renumber all of the resources that will come after the newly added one
  while max > position
    node = ng_xml.search('//resource[@sequence=\'' + position + '\']')
    node.first[sequence] = max + 1 if node.length > 0
    max -= 1
  end
  node = Nokogiri::XML::Node.new('resource', ng_xml)
  node['sequence'] = position.to_s
  node['id']       = resource_name
  node['type']     = type
  files.each do |file|
    file_node = Nokogiri::XML::Node.new('file', ng_xml)
    %w(shelve publish preserve).each {|x| file_node[x] = file[x.to_sym] ? file[x.to_sym] : '' }
    file_node['id'] = file[:name]
    node.add_child(file_node)

    [:md5, :sha1].each { |algo|
      next if file[algo].nil?
      checksum_node = Nokogiri::XML::Node.new('checksum', ng_xml)
      checksum_node['type'] = algo.to_s
      checksum_node.content = file[algo]
      file_node.add_child(checksum_node)
    }
    file_node['size'] = file[:size] if file[:size]
  end
  ng_xml.search('//contentMetadata').first.add_child(node)
  self.content = ng_xml.to_s
  node
end

#add_virtual_resource(child_druid, child_resource) ⇒ Nokogiri::XML::Element

Copies the child’s resource into the parent (self) as a virtual resource. Assumes the resource isn’t a duplicate of an existing virtual or real resource.

Parameters:

  • child_druid (String)

    druid

  • child_resource (Nokogiri::XML::Element)

Returns:

  • (Nokogiri::XML::Element)

    the new resource that was added to the contentMetadata



187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 187

def add_virtual_resource(child_druid, child_resource)
  # create a virtual resource element with attributes linked to the child and omit label
  sequence_max = ng_xml.search('//resource').map { |node| node[:sequence].to_i }.max
  resource = Nokogiri::XML::Element.new('resource', ng_xml)
  resource[:sequence] = sequence_max + 1
  resource[:id] = "#{pid.gsub(/^druid:/, '')}_#{resource[:sequence]}"
  resource[:type] = child_resource[:type]

  # iterate over all the published files and link to them
  child_resource.search('file[@publish=\'yes\']').each do |file|
    resource << generate_external_file_node(child_druid, child_resource[:id], file[:id], file[:mimetype])
  end
  resource << generate_also_available_as_node(child_druid)

  # attach the virtual resource as a sibling and return
  ng_xml.root << resource
  self.content = ng_xml.to_s
  resource
end

#generate_also_available_as_node(objectId) ⇒ Nokogiri::XML::Element

Generates the XML tree for virtual resource relationship reference. For example:

<relationship type="alsoAvailableAs" objectId="druid:mn123pq4567" />

Parameters:

  • objectId (String)

    the linked druid

Returns:

  • (Nokogiri::XML::Element)


108
109
110
111
112
113
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 108

def generate_also_available_as_node(objectId)
  relationship = ng_xml.create_element 'relationship'
  relationship[:type] = 'alsoAvailableAs'
  relationship[:objectId] = objectId
  relationship
end

#generate_external_file_node(objectId, resourceId, fileId, mimetype) ⇒ Nokogiri::XML::Element

Generates the XML tree for externalFile references. For example:

<externalFile objectId="druid:mn123pq4567" resourceId="Image01" fileId="image_01.jp2000" mimetype="image/jp2" />

Parameters:

  • objectId (String)

    the linked druid

  • resourceId (String)

    the linked druid’s resource identifier

  • fileId (String)

    the linked druid’s resource’s file identifier

  • mimetype (String)

    the file’s MIME type

Returns:

  • (Nokogiri::XML::Element)


95
96
97
98
99
100
101
102
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 95

def generate_external_file_node(objectId, resourceId, fileId, mimetype)
  externalFile = ng_xml.create_element 'externalFile'
  externalFile[:objectId]   = objectId
  externalFile[:resourceId] = resourceId
  externalFile[:fileId]     = fileId
  externalFile[:mimetype]   = mimetype
  externalFile
end

#move_resource(resource_name, new_position) ⇒ Nokogiri::XML::Element

You just had to have ordered lists in XML, didn’t you? Re-enumerate the sequence numbers affected

Parameters:

  • resource_name (String)

    unique id attribute of the resource

  • new_position (Integer, String)

    new sequence number of the resource, or a string that looks like one

Returns:

  • (Nokogiri::XML::Element)

    the resource node



339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 339

def move_resource(resource_name, new_position)
  node = singular_node('//resource[@id=\'' + resource_name + '\']')
  position = node['sequence'].to_i
  new_position = new_position.to_i # tolerate strings as a Legacy behavior
  return node if position == new_position
  # otherwise, is the resource being moved earlier in the sequence or later?
  up = new_position > position
  others = new_position..(up ? position - 1 : position + 1) # a range
  others.each do |i|
    item = ng_xml.at_xpath('/resource[@sequence=\'' + i.to_s + '\']')
    item['sequence'] = (up ? i - 1 : i + 1).to_s # if you're going up, everything else comes down and vice versa
  end
  node['sequence'] = new_position.to_s # set the node we already had last, so we don't hit it twice!
  self.content = ng_xml.to_s
  node
end

#public_xmlNokogiri::XML::Document

Returns sanitized for public consumption.

Returns:



38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 38

def public_xml
  result = ng_xml.clone

  # remove any resources or attributes that are not destined for the public XML
  result.xpath('/contentMetadata/resource[not(file[(@deliver="yes" or @publish="yes")]|externalFile)]').each(&:remove)
  result.xpath('/contentMetadata/resource/file[not(@deliver="yes" or @publish="yes")]'                ).each(&:remove)
  result.xpath('/contentMetadata/resource/file').xpath('@preserve|@shelve|@publish|@deliver'          ).each(&:remove)
  result.xpath('/contentMetadata/resource/file/checksum'                                              ).each(&:remove)

  # support for dereferencing links via externalFile element(s) to the source (child) item - see JUMBO-19
  result.xpath('/contentMetadata/resource/externalFile').each do |externalFile|
    # enforce pre-conditions that resourceId, objectId, fileId are required
    src_resource_id = externalFile['resourceId']
    src_druid = externalFile['objectId']
    src_file_id = externalFile['fileId']
    fail ArgumentError, "Malformed externalFile data: #{externalFile.inspect}" if [src_resource_id, src_file_id, src_druid].map(&:blank?).any?

    # grab source item
    src_item = Dor::Item.find(src_druid)

    # locate and extract the resourceId/fileId elements
    doc = src_item.datastreams['contentMetadata'].ng_xml
    src_resource = doc.at_xpath("//resource[@id=\"#{src_resource_id}\"]")
    src_file = src_resource.at_xpath("file[@id=\"#{src_file_id}\"]")
    src_image_data = src_file.at_xpath('imageData')

    # always use title regardless of whether a child label is present
    src_label = doc.create_element('label')
    src_label.content = src_item.datastreams['DC'].title.first

    # add the extracted label and imageData
    externalFile.add_previous_sibling(src_label)
    externalFile << src_image_data unless src_image_data.nil?
  end

  result
end

#remove_file(file_name) ⇒ Object

Parameters:

  • file_name (String)

    ID of the file element



260
261
262
263
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 260

def remove_file(file_name)
  ng_xml.search('//file[@id=\'' + file_name + '\']').each(&:remove)
  self.content = ng_xml.to_s
end

#remove_resource(resource_name) ⇒ Object

Parameters:

  • resource_name (String)

    ID of the resource



246
247
248
249
250
251
252
253
254
255
256
257
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 246

def remove_resource(resource_name)
  node = singular_node('//resource[@id=\'' + resource_name + '\']')
  position = node['sequence'].to_i + 1
  node.remove
  loop do
    res = ng_xml.search('//resource[@sequence=\'' + position.to_s + '\']')
    break if res.length == 0
    res['sequence'] = position.to_s
    position += 1
  end
  self.content = ng_xml.to_s
end

#rename_file(old_name, new_name) ⇒ Nokogiri::XML::Element

Returns the file node.

Parameters:

  • old_name (String)

    unique id attribute of the file element

  • new_name (String)

    new unique id value being assigned

Returns:

  • (Nokogiri::XML::Element)

    the file node



302
303
304
305
306
307
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 302

def rename_file(old_name, new_name)
  file_node = ng_xml.search('//file[@id=\'' + old_name + '\']').first
  file_node['id'] = new_name
  self.content = ng_xml.to_s
  file_node
end

#set_content_type(old_type, old_resource_type, new_type, new_resource_type) ⇒ Object

Set the content type (e.g. “book”) and the resource type (e.g. “book”) for all resources

Parameters:

  • old_type (String)

    the old content type

  • old_resource_type (String)

    the old type for all resources

  • new_type (String)

    the new content type

  • new_resource_type (String)

    the new type for all resources



361
362
363
364
365
366
367
368
369
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 361

def set_content_type(old_type, old_resource_type, new_type, new_resource_type)
  ng_xml.search('/contentMetadata[@type=\'' + old_type + '\']').each do |node|
    node['type'] = new_type
    ng_xml.search('//resource[@type=\'' + old_resource_type + '\']').each do |resource|
      resource['type'] = new_resource_type
    end
  end
  self.content = ng_xml.to_s
end

#singular_node(xpath) ⇒ Nokogiri::XML::Element

Only use this when you want the behavior of raising an exception if anything besides exactly one matching node is found. Otherwise just use .xpath, .at_xpath or .search.

Parameters:

  • xpath (String)

    accessor invocation for Nokogiri xpath

Returns:

  • (Nokogiri::XML::Element)

    the matched element



80
81
82
83
84
85
86
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 80

def singular_node(xpath)
  node = ng_xml.search(xpath)
  len  = node.length
  raise "#{xpath} not found" if len < 1
  raise "#{xpath} duplicated: #{len} found" if len != 1
  node.first
end

#to_solr(solr_doc = {}, *args) ⇒ Object

Terminology-based solrization is going to be painfully slow for large contentMetadata streams. Just select the relevant elements instead. TODO: Call super()?



118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 118

def to_solr(solr_doc = {}, *args)
  doc = ng_xml
  return solr_doc unless doc.root['type']

  preserved_size = 0
  counts = Hash.new(0)                # default count is zero
  resource_type_counts = Hash.new(0)  # default count is zero
  first_shelved_image = nil

  doc.xpath('contentMetadata/resource').sort { |a, b| a['sequence'].to_i <=> b['sequence'].to_i }.each do |resource|
    counts['resource'] += 1
    resource_type_counts[resource['type']] += 1 if resource['type']
    resource.xpath('file').each do |file|
      counts['content_file'] += 1
      preserved_size += file['size'].to_i if file['preserve'] == 'yes'
      next unless file['shelve'] == 'yes'
      counts['shelved_file'] += 1
      first_shelved_image ||= file['id'] if file['id'] =~ /jp2$/
    end
  end
  solr_doc['content_type_ssim'              ] = doc.root['type']
  solr_doc['content_file_count_itsi'        ] = counts['content_file']
  solr_doc['shelved_content_file_count_itsi'] = counts['shelved_file']
  solr_doc['resource_count_itsi'            ] = counts['resource']
  solr_doc['preserved_size_dbtsi'           ] = preserved_size # double (trie) to support very large sizes
  solr_doc['resource_types_ssim'            ] = resource_type_counts.keys if resource_type_counts.size > 0
  resource_type_counts.each do |key, count|
    solr_doc["#{key}_resource_count_itsi"] = count
  end
  # first_shelved_image is neither indexed nor multiple
  solr_doc['first_shelved_image_ss'] = first_shelved_image unless first_shelved_image.nil?
  solr_doc
end

#update_attributes(file_name, publish, shelve, preserve) ⇒ Object

Parameters:

  • file_name (String)

    ID of the file element

  • publish (String)
  • shelve (String)
  • preserve (String)


269
270
271
272
273
274
275
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 269

def update_attributes(file_name, publish, shelve, preserve)
  file_node = ng_xml.search('//file[@id=\'' + file_name + '\']').first
  file_node['publish' ] = publish
  file_node['shelve'  ] = shelve
  file_node['preserve'] = preserve
  self.content = ng_xml.to_s
end

#update_file(file, old_file_id) ⇒ Object

Parameters:

  • file (Object)

    some hash-like file

  • old_file_id (String)

    unique id attribute of the file element



279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 279

def update_file(file, old_file_id)
  file_node = ng_xml.search('//file[@id=\'' + old_file_id + '\']').first
  file_node['id'] = file[:name]
  [:md5, :sha1].each { |algo|
    next if file[algo].nil?
    checksum_node = ng_xml.search('//file[@id=\'' + old_file_id + '\']/checksum[@type=\'' + algo.to_s + '\']').first
    if checksum_node.nil?
      checksum_node = Nokogiri::XML::Node.new('checksum', ng_xml)
      file_node.add_child(checksum_node)
    end
    checksum_node['type'] = algo.to_s
    checksum_node.content = file[algo]
  }

  [:size, :shelve, :preserve, :publish].each { |x|
    file_node[x.to_s] = file[x] if file[x]
  }
  self.content = ng_xml.to_s
end

#update_resource_label(resource_name, new_label) ⇒ Nokogiri::XML::Element

Updates old label OR creates a new one if necessary

Parameters:

  • resource_name (String)

    unique id attribute of the resource

  • new_label (String)

    label value being assigned

Returns:

  • (Nokogiri::XML::Element)

    the resource node



313
314
315
316
317
318
319
320
321
322
323
324
325
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 313

def update_resource_label(resource_name, new_label)
  node = singular_node('//resource[@id=\'' + resource_name + '\']')
  labels = node.xpath('./label')
  if labels.length == 0
    label_node = Nokogiri::XML::Node.new('label', ng_xml) # create a label
    label_node.content = new_label
    node.add_child(label_node)
  else
    labels.first.content = new_label
  end
  self.content = ng_xml.to_s
  node
end

#update_resource_type(resource_name, new_type) ⇒ Object

Parameters:

  • resource_name (String)

    unique id attribute of the resource

  • new_type (String)

    type value being assigned



329
330
331
332
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 329

def update_resource_type(resource_name, new_type)
  singular_node('//resource[@id=\'' + resource_name + '\']')['type'] = new_type
  self.content = ng_xml.to_s
end