Class: Dor::ContentMetadataDS

Inherits:
ActiveFedora::OmDatastream
  • Object
show all
Defined in:
lib/dor/datastreams/content_metadata_ds.rb

Instance Method Summary collapse

Instance Method Details

#add_virtual_resource(child_druid, child_resource) ⇒ Nokogiri::XML::Element

Copies the child’s resource into the parent (self) as a virtual resource. Assumes the resource isn’t a duplicate of an existing virtual or real resource.

Parameters:

  • child_druid (String)

    druid

  • child_resource (Nokogiri::XML::Element)

Returns:

  • (Nokogiri::XML::Element)

    the new resource that was added to the contentMetadata



143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 143

def add_virtual_resource(child_druid, child_resource)
  # create a virtual resource element with attributes linked to the child and omit label
  ng_xml_will_change!
  sequence_max = ng_xml.search('//resource').map { |node| node[:sequence].to_i }.max || 0
  resource = Nokogiri::XML::Element.new('resource', ng_xml)
  resource[:sequence] = sequence_max + 1
  resource[:id] = "#{pid.gsub(/^druid:/, '')}_#{resource[:sequence]}"
  resource[:type] = child_resource[:type]

  # iterate over all the published files and link to them
  child_resource.search('file[@publish=\'yes\']').each do |file|
    resource << generate_external_file_node(child_druid, child_resource[:id], file[:id], file[:mimetype])
  end
  resource << generate_also_available_as_node(child_druid)

  # attach the virtual resource as a sibling and return
  ng_xml.root << resource
  resource
end

#generate_also_available_as_node(objectId) ⇒ Nokogiri::XML::Element

Generates the XML tree for virtual resource relationship reference. For example:

<relationship type="alsoAvailableAs" objectId="druid:mn123pq4567" />

Parameters:

  • objectId (String)

    the linked druid

Returns:

  • (Nokogiri::XML::Element)


73
74
75
76
77
78
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 73

def generate_also_available_as_node(objectId)
  relationship = ng_xml.create_element 'relationship'
  relationship[:type] = 'alsoAvailableAs'
  relationship[:objectId] = objectId
  relationship
end

#generate_external_file_node(objectId, resourceId, fileId, mimetype) ⇒ Nokogiri::XML::Element

Generates the XML tree for externalFile references. For example:

<externalFile objectId="druid:mn123pq4567" resourceId="Image01" fileId="image_01.jp2000" mimetype="image/jp2" />

Parameters:

  • objectId (String)

    the linked druid

  • resourceId (String)

    the linked druid’s resource identifier

  • fileId (String)

    the linked druid’s resource’s file identifier

  • mimetype (String)

    the file’s MIME type

Returns:

  • (Nokogiri::XML::Element)


60
61
62
63
64
65
66
67
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 60

def generate_external_file_node(objectId, resourceId, fileId, mimetype)
  externalFile = ng_xml.create_element 'externalFile'
  externalFile[:objectId]   = objectId
  externalFile[:resourceId] = resourceId
  externalFile[:fileId]     = fileId
  externalFile[:mimetype]   = mimetype
  externalFile
end

#move_resource(resource_name, new_position) ⇒ Nokogiri::XML::Element

You just had to have ordered lists in XML, didn’t you? Re-enumerate the sequence numbers affected

Parameters:

  • resource_name (String)

    unique id attribute of the resource

  • new_position (Integer, String)

    new sequence number of the resource, or a string that looks like one

Returns:

  • (Nokogiri::XML::Element)

    the resource node



241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 241

def move_resource(resource_name, new_position)
  ng_xml_will_change!
  node = singular_node('//resource[@id=\'' + resource_name + '\']')
  position = node['sequence'].to_i
  new_position = new_position.to_i # tolerate strings as a Legacy behavior
  return node if position == new_position

  # otherwise, is the resource being moved earlier in the sequence or later?
  up = new_position > position
  others = new_position..(up ? position - 1 : position + 1) # a range
  others.each do |i|
    item = ng_xml.at_xpath('/resource[@sequence=\'' + i.to_s + '\']')
    item['sequence'] = (up ? i - 1 : i + 1).to_s # if you're going up, everything else comes down and vice versa
  end
  node['sequence'] = new_position.to_s # set the node we already had last, so we don't hit it twice!
  node
end

#prefixObject

maintain AF < 8 indexing behavior



275
276
277
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 275

def prefix
  ''
end

#rename_file(old_name, new_name) ⇒ Nokogiri::XML::Element

Returns the file node.

Parameters:

  • old_name (String)

    unique id attribute of the file element

  • new_name (String)

    new unique id value being assigned

Returns:

  • (Nokogiri::XML::Element)

    the file node



204
205
206
207
208
209
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 204

def rename_file(old_name, new_name)
  ng_xml_will_change!
  file_node = ng_xml.search('//file[@id=\'' + old_name + '\']').first
  file_node['id'] = new_name
  file_node
end

#set_content_type(old_type, old_resource_type, new_type, new_resource_type) ⇒ Object

Set the content type (e.g. “book”) and the resource type (e.g. “book”) for all resources

Parameters:

  • old_type (String)

    the old content type

  • old_resource_type (String)

    the old type for all resources

  • new_type (String)

    the new content type

  • new_resource_type (String)

    the new type for all resources



264
265
266
267
268
269
270
271
272
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 264

def set_content_type(old_type, old_resource_type, new_type, new_resource_type)
  ng_xml_will_change!
  ng_xml.search('/contentMetadata[@type=\'' + old_type + '\']').each do |node|
    node['type'] = new_type
    ng_xml.search('//resource[@type=\'' + old_resource_type + '\']').each do |resource|
      resource['type'] = new_resource_type
    end
  end
end

#singular_node(xpath) ⇒ Nokogiri::XML::Element

Only use this when you want the behavior of raising an exception if anything besides exactly one matching node is found. Otherwise just use .xpath, .at_xpath or .search.

Parameters:

  • xpath (String)

    accessor invocation for Nokogiri xpath

Returns:

  • (Nokogiri::XML::Element)

    the matched element



44
45
46
47
48
49
50
51
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 44

def singular_node(xpath)
  node = ng_xml.search(xpath)
  len  = node.length
  raise "#{xpath} not found" if len < 1
  raise "#{xpath} duplicated: #{len} found" if len != 1

  node.first
end

#to_solr(solr_doc = {}, *_args) ⇒ Object

Terminology-based solrization is going to be painfully slow for large contentMetadata streams. Just select the relevant elements instead. TODO: Call super()?



83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 83

def to_solr(solr_doc = {}, *_args)
  doc = ng_xml
  return solr_doc unless doc.root['type']

  preserved_size = 0
  shelved_size = 0
  counts = Hash.new(0)                # default count is zero
  resource_type_counts = Hash.new(0)  # default count is zero
  file_roles = ::Set.new
  mime_types = ::Set.new
  first_shelved_image = nil

  doc.xpath('contentMetadata/resource').sort { |a, b| a['sequence'].to_i <=> b['sequence'].to_i }.each do |resource|
    counts['resource'] += 1
    resource_type_counts[resource['type']] += 1 if resource['type']
    resource.xpath('file').each do |file|
      counts['content_file'] += 1
      preserved_size += file['size'].to_i if file['preserve'] == 'yes'
      shelved_size += file['size'].to_i if file['shelve'] == 'yes'
      if file['shelve'] == 'yes'
        counts['shelved_file'] += 1
        first_shelved_image ||= file['id'] if file['id'] =~ /jp2$/
      end
      mime_types << file['mimetype']
      file_roles << file['role'] if file['role']
    end
  end
  solr_doc['content_type_ssim'] = doc.root['type']
  solr_doc['content_file_mimetypes_ssim'] = mime_types.to_a
  solr_doc['content_file_count_itsi'] = counts['content_file']
  solr_doc['shelved_content_file_count_itsi'] = counts['shelved_file']
  solr_doc['resource_count_itsi'] = counts['resource']
  solr_doc['preserved_size_dbtsi'] = preserved_size # double (trie) to support very large sizes
  solr_doc['shelved_size_dbtsi'] = shelved_size # double (trie) to support very large sizes
  solr_doc['resource_types_ssim'] = resource_type_counts.keys if resource_type_counts.size > 0
  solr_doc['content_file_roles_ssim'] = file_roles.to_a if file_roles.size > 0
  resource_type_counts.each do |key, count|
    solr_doc["#{key}_resource_count_itsi"] = count
  end
  # first_shelved_image is neither indexed nor multiple
  solr_doc['first_shelved_image_ss'] = first_shelved_image unless first_shelved_image.nil?
  solr_doc
end

#unshelve_and_unpublishObject

END: READ ONLY METHODS DATSTREAM WRITING METHODS



130
131
132
133
134
135
136
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 130

def unshelve_and_unpublish
  ng_xml.xpath('/contentMetadata/resource//file').each_with_index do |file_node, index|
    ng_xml_will_change! if index == 0
    file_node['publish'] = 'no'
    file_node['shelve'] = 'no'
  end
end

#update_attributes(file_name, publish, shelve, preserve, attributes = {}) ⇒ Object

Parameters:

  • file_name (String)

    ID of the file element

  • publish (String)
  • shelve (String)
  • preserve (String)


167
168
169
170
171
172
173
174
175
176
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 167

def update_attributes(file_name, publish, shelve, preserve, attributes = {})
  ng_xml_will_change!
  file_node = ng_xml.search('//file[@id=\'' + file_name + '\']').first
  file_node['publish'] = publish
  file_node['shelve'] = shelve
  file_node['preserve'] = preserve
  attributes.each do |key, value|
    file_node[key] = value
  end
end

#update_file(file, old_file_id) ⇒ Object

Parameters:

  • file (Object)

    some hash-like file

  • old_file_id (String)

    unique id attribute of the file element



180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 180

def update_file(file, old_file_id)
  ng_xml_will_change!
  file_node = ng_xml.search('//file[@id=\'' + old_file_id + '\']').first
  file_node['id'] = file[:name]
  %i[md5 sha1].each do |algo|
    next if file[algo].nil?

    checksum_node = ng_xml.search('//file[@id=\'' + old_file_id + '\']/checksum[@type=\'' + algo.to_s + '\']').first
    if checksum_node.nil?
      checksum_node = Nokogiri::XML::Node.new('checksum', ng_xml)
      file_node.add_child(checksum_node)
    end
    checksum_node['type'] = algo.to_s
    checksum_node.content = file[algo]
  end

  %i[size shelve preserve publish role].each do |x|
    file_node[x.to_s] = file[x] if file[x]
  end
end

#update_resource_label(resource_name, new_label) ⇒ Nokogiri::XML::Element

Updates old label OR creates a new one if necessary

Parameters:

  • resource_name (String)

    unique id attribute of the resource

  • new_label (String)

    label value being assigned

Returns:

  • (Nokogiri::XML::Element)

    the resource node



215
216
217
218
219
220
221
222
223
224
225
226
227
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 215

def update_resource_label(resource_name, new_label)
  ng_xml_will_change!
  node = singular_node('//resource[@id=\'' + resource_name + '\']')
  labels = node.xpath('./label')
  if labels.length == 0
    label_node = Nokogiri::XML::Node.new('label', ng_xml) # create a label
    label_node.content = new_label
    node.add_child(label_node)
  else
    labels.first.content = new_label
  end
  node
end

#update_resource_type(resource_name, new_type) ⇒ Object

Parameters:

  • resource_name (String)

    unique id attribute of the resource

  • new_type (String)

    type value being assigned



231
232
233
234
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 231

def update_resource_type(resource_name, new_type)
  ng_xml_will_change!
  singular_node('//resource[@id=\'' + resource_name + '\']')['type'] = new_type
end