Class: Dor::ContentMetadataDS

Inherits:

ActiveFedora::OmDatastream

Object
ActiveFedora::OmDatastream
Dor::ContentMetadataDS

show all

Includes:: Upgradable, SolrDocHelper

Defined in:: lib/dor/datastreams/content_metadata_ds.rb

Instance Method Summary collapse

#add_file(file, resource_name) ⇒ Object
#add_resource(files, resource_name, position, type = "file") ⇒ Object
#move_resource(resource_name, new_position) ⇒ Nokogiri::XML::Element

You just had to have ordered lists in XML, didn’t you? Re-enumerate the sequence numbers affected.
#public_xml ⇒ Object
#remove_file(file_name) ⇒ Object
#remove_resource(resource_name) ⇒ Object
#rename_file(old_name, new_name) ⇒ Nokogiri::XML::Element

The file node.
#set_content_type(old_type, old_resource_type, new_type, new_resource_type) ⇒ Object

Set the content type and the resource types for all resources.
#singular_node(xpath) ⇒ Nokogiri::XML::Element

Only use this when you want the behavior of raising an exception if anything besides exactly one matching node is found.
#to_solr(solr_doc = Hash.new, *args) ⇒ Object

Terminology-based solrization is going to be painfully slow for large contentMetadata streams.
#update_attributes(file_name, publish, shelve, preserve) ⇒ Object
#update_file(file, old_file_id) ⇒ Object
#update_resource_label(resource_name, new_label) ⇒ Nokogiri::XML::Element

Updates old label OR creates a new one if necessary.
#update_resource_type(resource_name, new_type) ⇒ Object

Methods included from SolrDocHelper

#add_solr_value

Methods included from Upgradable

add_upgrade_callback, included, run_upgrade_callbacks, #upgrade!

Instance Method Details

#add_file(file, resource_name) ⇒ `Object`

# File 'lib/dor/datastreams/content_metadata_ds.rb', line 44

def add_file(file, resource_name)
  xml=self.ng_xml
  resource_nodes = xml.search('//resource[@id=\''+resource_name+'\']')
  raise 'resource doesnt exist.' if resource_nodes.length==0
  node=resource_nodes.first
  file_node=Nokogiri::XML::Node.new('file',xml)
  file_node['id']=file[:name]
  file_node['shelve'  ] = file[:shelve  ] ? file[:shelve  ] : ''
  file_node['publish' ] = file[:publish ] ? file[:publish ] : ''
  file_node['preserve'] = file[:preserve] ? file[:preserve] : ''
  node.add_child(file_node)

  [:md5, :sha1].each do |algo|
    next unless file[algo]
    checksum_node = Nokogiri::XML::Node.new('checksum',xml)
    checksum_node['type'] = algo.to_s
    checksum_node.content = file[algo]
    file_node.add_child(checksum_node)
  end
  file_node['size'    ] = file[:size     ] if file[:size     ]
  file_node['mimetype'] = file[:mime_type] if file[:mime_type]
  self.content=xml.to_s
  self.save
end

#add_resource(files, resource_name, position, type = "file") ⇒ `Object`

# File 'lib/dor/datastreams/content_metadata_ds.rb', line 69

def add_resource(files,resource_name, position,type="file")
  xml=self.ng_xml
  if xml.search('//resource[@id=\''+resource_name+'\']').length>0
    raise 'resource '+resource_name+' already exists'
  end
  node=nil

  max = xml.search('//resource').map{ |node| node['sequence'].to_i }.max
  #renumber all of the resources that will come after the newly added one
  while max>position do
    node=xml.search('//resource[@sequence=\'' + position + '\']')
    node.first[sequence]=max+1 if node.length>0
    max-=1
  end
  node=Nokogiri::XML::Node.new('resource',xml)
  node['sequence']=position.to_s
  node['id']=resource_name
  node['type']=type
  files.each do |file|
    file_node=Nokogiri::XML::Node.new('file',xml)
    %w[shelve publish preserve].each {|x| file_node[x] = file[x.to_sym] ? file[x.to_sym] : '' }
    file_node['id'] = file[:name]
    node.add_child(file_node)

    [:md5, :sha1].each { |algo|
      next if file[algo].nil?
      checksum_node = Nokogiri::XML::Node.new('checksum',xml)
      checksum_node['type'] = algo.to_s
      checksum_node.content = file[algo]
      file_node.add_child(checksum_node)
    }
    file_node['size'] = file[:size] if file[:size]
  end
  xml.search('//contentMetadata').first.add_child(node)
  self.content=xml.to_s
  self.save
end

#move_resource(resource_name, new_position) ⇒ `Nokogiri::XML::Element`

You just had to have ordered lists in XML, didn’t you? Re-enumerate the sequence numbers affected

Parameters:

resource_name (String) —

unique id attribute of the resource
new_position (Integer, String) —

new sequence number of the resource, or a string that looks like one

Returns:

(Nokogiri::XML::Element) —

the resource node

# File 'lib/dor/datastreams/content_metadata_ds.rb', line 241

def move_resource resource_name, new_position
  node = singular_node('//resource[@id=\''+resource_name+'\']')
  position = node['sequence'].to_i
  new_position = new_position.to_i              # tolerate strings as a Legacy behavior
  return node if position == new_position
  #otherwise, is the resource being moved earlier in the sequence or later?
  up = new_position>position
  others = new_position..(up ? position-1 : position+1)  # a range
  others.each do |i|
    item = self.ng_xml.at_xpath('/resource[@sequence=\''+i.to_s+'\']')
    item['sequence'] = (up ? i-1 : i+1).to_s    # if you're going up, everything else comes down and vice versa
  end
  node['sequence'] = new_position.to_s          # set the node we already had last, so we don't hit it twice!
  return node
end

#public_xml ⇒ `Object`

# File 'lib/dor/datastreams/content_metadata_ds.rb', line 36

def public_xml
  result = self.ng_xml.clone
  result.xpath('/contentMetadata/resource[not(file[(@deliver="yes" or @publish="yes")])]'   ).each { |n| n.remove }
  result.xpath('/contentMetadata/resource/file[not(@deliver="yes" or @publish="yes")]'      ).each { |n| n.remove }
  result.xpath('/contentMetadata/resource/file').xpath('@preserve|@shelve|@publish|@deliver').each { |n| n.remove }
  result.xpath('/contentMetadata/resource/file/checksum'                                    ).each { |n| n.remove }
  result
end

#remove_file(file_name) ⇒ `Object`

# File 'lib/dor/datastreams/content_metadata_ds.rb', line 122

def remove_file file_name
  xml=self.ng_xml
  xml.search('//file[@id=\''+file_name+'\']').each do |node|
    node.remove
  end
  self.content=xml.to_s
  self.save
end

#remove_resource(resource_name) ⇒ `Object`

# File 'lib/dor/datastreams/content_metadata_ds.rb', line 107

def remove_resource resource_name
  xml=self.ng_xml
  node = singular_node('//resource[@id=\''+resource_name+'\']')
  position = node['sequence'].to_i+1
  node.remove
  while true
    res=xml.search('//resource[@sequence=\''+position.to_s+'\']')
    break if res.length==0
    res['sequence']=position.to_s
    position=position+1
  end
  self.content=xml.to_s
  self.save
end

#rename_file(old_name, new_name) ⇒ `Nokogiri::XML::Element`

Returns the file node.

Parameters:

old_name (String) —

unique id attribute of the file element
new_name (String) —

new unique id value being assigned

Returns:

(Nokogiri::XML::Element) —

the file node

# File 'lib/dor/datastreams/content_metadata_ds.rb', line 204

def rename_file old_name, new_name
  xml=self.ng_xml
  file_node=xml.search('//file[@id=\''+old_name+'\']').first
  file_node['id']=new_name
  self.content=xml.to_s
  self.save
end

#set_content_type(old_type, old_resource_type, new_type, new_resource_type) ⇒ `Object`

Set the content type and the resource types for all resources

Parameters:

new_type (String) —

the new content type, ex book
new_resource_type (String) —

the new type for all resources, ex book

# File 'lib/dor/datastreams/content_metadata_ds.rb', line 260

def set_content_type old_type, old_resource_type, new_type, new_resource_type
  xml=self.ng_xml
  xml.search('/contentMetadata[@type=\''+old_type+'\']').each do |node|
    node['type']=new_type
    xml.search('//resource[@type=\''+old_resource_type+'\']').each do |resource|
      resource['type']=new_resource_type
    end
  end
  self.content=xml.to_s
end

#singular_node(xpath) ⇒ `Nokogiri::XML::Element`

Only use this when you want the behavior of raising an exception if anything besides exactly one matching node is found. Otherwise just use .xpath, .at_xpath or .search.

Parameters:

xpath (String) —

accessor invocation for Nokogiri xpath

Returns:

(Nokogiri::XML::Element) —

the matched element

# File 'lib/dor/datastreams/content_metadata_ds.rb', line 275

def singular_node xpath
  node = self.ng_xml.search(xpath)
  len  = node.length
  raise "#{xpath} not found" if len < 1
  raise "#{xpath} duplicated: #{len} found" if len != 1
  node.first
end

#to_solr(solr_doc = Hash.new, *args) ⇒ `Object`

Terminology-based solrization is going to be painfully slow for large contentMetadata streams. Just select the relevant elements instead. TODO: Call super()?

# File 'lib/dor/datastreams/content_metadata_ds.rb', line 164

def to_solr(solr_doc=Hash.new, *args)
  doc = self.ng_xml
  return solr_doc unless doc.root['type']

  preserved_size=0
  counts = Hash.new(0)                # default count is zero
  resource_type_counts = Hash.new(0)  # default count is zero
  first_shelved_image=nil

  doc.xpath('contentMetadata/resource').sort { |a,b| a['sequence'].to_i <=> b['sequence'].to_i }.each do |resource|
    counts['resource']+=1
    resource_type_counts[resource['type']]+=1 if resource['type']
    resource.xpath('file').each do |file|
      counts['content_file']+=1
      preserved_size += file['size'].to_i if file['preserve'] == 'yes'
      if file['shelve'] == 'yes'
        counts['shelved_file']+=1
        if first_shelved_image.nil? && file['id'].match(/jp2$/)
          first_shelved_image=file['id']
        end
      end
    end
  end
  solr_doc["content_type_ssim"              ] = doc.root['type']
  solr_doc["content_file_count_itsi"        ] = counts['content_file']
  solr_doc["shelved_content_file_count_itsi"] = counts['shelved_file']
  solr_doc["resource_count_itsi"            ] = counts['resource']
  solr_doc["preserved_size_dbtsi"           ] = preserved_size        # double (trie) to support very large sizes
  solr_doc["resource_types_ssim"            ] = resource_type_counts.keys if resource_type_counts.size > 0
  resource_type_counts.each do |key, count|
    solr_doc["#{key}_resource_count_itsi"] = count
  end
  # first_shelved_image is neither indexed nor multiple
  solr_doc["first_shelved_image_ss"] = first_shelved_image unless first_shelved_image.nil?
  solr_doc
end

#update_attributes(file_name, publish, shelve, preserve) ⇒ `Object`

# File 'lib/dor/datastreams/content_metadata_ds.rb', line 130

def update_attributes file_name, publish, shelve, preserve
  xml=self.ng_xml
  file_node=xml.search('//file[@id=\''+file_name+'\']').first
  file_node['shelve'  ]=shelve
  file_node['publish' ]=publish
  file_node['preserve']=preserve
  self.content=xml.to_s
  self.save
end

#update_file(file, old_file_id) ⇒ `Object`

# File 'lib/dor/datastreams/content_metadata_ds.rb', line 139

def update_file file, old_file_id
  xml=self.ng_xml
  file_node=xml.search('//file[@id=\''+old_file_id+'\']').first
  file_node['id']=file[:name]
  [:md5, :sha1].each { |algo|
    next if file[algo].nil?
    checksum_node = xml.search('//file[@id=\''+old_file_id+'\']/checksum[@type=\'' + algo.to_s + '\']').first
    if checksum_node.nil?
      checksum_node = Nokogiri::XML::Node.new('checksum',xml)
      file_node.add_child(checksum_node)
    end
    checksum_node['type'] = algo.to_s
    checksum_node.content = file[algo]
  }

  [:size, :shelve, :preserve, :publish].each{ |x|
    file_node[x.to_s] = file[x] if file[x]
  }
  self.content=xml.to_s
  self.save
end

#update_resource_label(resource_name, new_label) ⇒ `Nokogiri::XML::Element`

Updates old label OR creates a new one if necessary

Parameters:

resource_name (String) —

unique id attribute of the resource
new_label (String) —

label value being assigned

Returns:

(Nokogiri::XML::Element) —

the resource node

# File 'lib/dor/datastreams/content_metadata_ds.rb', line 216

def update_resource_label resource_name, new_label
  node = singular_node('//resource[@id=\''+resource_name+'\']')
  labels = node.xpath('./label')
  if(labels.length==0)
    #create a label
    label_node = Nokogiri::XML::Node.new('label',self.ng_xml)
    label_node.content=new_label
    node.add_child(label_node)
  else
    labels.first.content=new_label
  end
  return node
end

#update_resource_type(resource_name, new_type) ⇒ `Object`

Parameters:

resource_name (String) —

unique id attribute of the resource
new_type (String) —

type value being assigned



232
233
234

# File 'lib/dor/datastreams/content_metadata_ds.rb', line 232

def update_resource_type resource_name, new_type
  singular_node('//resource[@id=\''+resource_name+'\']')['type']=new_type
end

Class: Dor::ContentMetadataDS

Instance Method Summary collapse

Methods included from SolrDocHelper

Methods included from Upgradable

Instance Method Details

#add_file(file, resource_name) ⇒ Object

#add_resource(files, resource_name, position, type = "file") ⇒ Object

#move_resource(resource_name, new_position) ⇒ Nokogiri::XML::Element

#public_xml ⇒ Object

#remove_file(file_name) ⇒ Object

#remove_resource(resource_name) ⇒ Object

#rename_file(old_name, new_name) ⇒ Nokogiri::XML::Element

#set_content_type(old_type, old_resource_type, new_type, new_resource_type) ⇒ Object

#singular_node(xpath) ⇒ Nokogiri::XML::Element

#to_solr(solr_doc = Hash.new, *args) ⇒ Object

#update_attributes(file_name, publish, shelve, preserve) ⇒ Object

#update_file(file, old_file_id) ⇒ Object

#update_resource_label(resource_name, new_label) ⇒ Nokogiri::XML::Element

#update_resource_type(resource_name, new_type) ⇒ Object

#add_file(file, resource_name) ⇒ `Object`

#add_resource(files, resource_name, position, type = "file") ⇒ `Object`

#move_resource(resource_name, new_position) ⇒ `Nokogiri::XML::Element`

#public_xml ⇒ `Object`

#remove_file(file_name) ⇒ `Object`

#remove_resource(resource_name) ⇒ `Object`

#rename_file(old_name, new_name) ⇒ `Nokogiri::XML::Element`

#set_content_type(old_type, old_resource_type, new_type, new_resource_type) ⇒ `Object`

#singular_node(xpath) ⇒ `Nokogiri::XML::Element`

#to_solr(solr_doc = Hash.new, *args) ⇒ `Object`

#update_attributes(file_name, publish, shelve, preserve) ⇒ `Object`

#update_file(file, old_file_id) ⇒ `Object`

#update_resource_label(resource_name, new_label) ⇒ `Nokogiri::XML::Element`

#update_resource_type(resource_name, new_type) ⇒ `Object`