Class: Dor::ContentMetadataDS

Inherits:
ActiveFedora::OmDatastream
  • Object
show all
Includes:
Upgradable, SolrDocHelper
Defined in:
lib/dor/datastreams/content_metadata_ds.rb

Instance Method Summary collapse

Methods included from SolrDocHelper

#add_solr_value

Methods included from Upgradable

add_upgrade_callback, included, run_upgrade_callbacks, #upgrade!

Instance Method Details

#add_file(file, resource_name) ⇒ Object



44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 44

def add_file(file, resource_name)
  xml=self.ng_xml
  resource_nodes = xml.search('//resource[@id=\''+resource_name+'\']')
  raise 'resource doesnt exist.' if resource_nodes.length==0
  node=resource_nodes.first
  file_node=Nokogiri::XML::Node.new('file',xml)
  file_node['id']=file[:name]
  file_node['shelve'  ] = file[:shelve  ] ? file[:shelve  ] : ''
  file_node['publish' ] = file[:publish ] ? file[:publish ] : ''
  file_node['preserve'] = file[:preserve] ? file[:preserve] : ''
  node.add_child(file_node)

  [:md5, :sha1].each do |algo|
    next unless file[algo]
    checksum_node = Nokogiri::XML::Node.new('checksum',xml)
    checksum_node['type'] = algo.to_s
    checksum_node.content = file[algo]
    file_node.add_child(checksum_node)
  end
  file_node['size'    ] = file[:size     ] if file[:size     ]
  file_node['mimetype'] = file[:mime_type] if file[:mime_type]
  self.content=xml.to_s
  self.save
end

#add_resource(files, resource_name, position, type = "file") ⇒ Object



69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 69

def add_resource(files,resource_name, position,type="file")
  xml=self.ng_xml
  if xml.search('//resource[@id=\''+resource_name+'\']').length>0
    raise 'resource '+resource_name+' already exists'
  end
  node=nil

  max = xml.search('//resource').map{ |node| node['sequence'].to_i }.max
  #renumber all of the resources that will come after the newly added one
  while max>position do
    node=xml.search('//resource[@sequence=\'' + position + '\']')
    node.first[sequence]=max+1 if node.length>0
    max-=1
  end
  node=Nokogiri::XML::Node.new('resource',xml)
  node['sequence']=position.to_s
  node['id']=resource_name
  node['type']=type
  files.each do |file|
    file_node=Nokogiri::XML::Node.new('file',xml)
    %w[shelve publish preserve].each {|x| file_node[x] = file[x.to_sym] ? file[x.to_sym] : '' }
    file_node['id'] = file[:name]
    node.add_child(file_node)

    [:md5, :sha1].each { |algo|
      next if file[algo].nil?
      checksum_node = Nokogiri::XML::Node.new('checksum',xml)
      checksum_node['type'] = algo.to_s
      checksum_node.content = file[algo]
      file_node.add_child(checksum_node)
    }
    file_node['size'] = file[:size] if file[:size]
  end
  xml.search('//contentMetadata').first.add_child(node)
  self.content=xml.to_s
  self.save
end

#move_resource(resource_name, new_position) ⇒ Nokogiri::XML::Element

You just had to have ordered lists in XML, didn’t you? Re-enumerate the sequence numbers affected

Parameters:

  • resource_name (String)

    unique id attribute of the resource

  • new_position (Integer, String)

    new sequence number of the resource, or a string that looks like one

Returns:

  • (Nokogiri::XML::Element)

    the resource node



241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 241

def move_resource resource_name, new_position
  node = singular_node('//resource[@id=\''+resource_name+'\']')
  position = node['sequence'].to_i
  new_position = new_position.to_i              # tolerate strings as a Legacy behavior
  return node if position == new_position
  #otherwise, is the resource being moved earlier in the sequence or later?
  up = new_position>position
  others = new_position..(up ? position-1 : position+1)  # a range
  others.each do |i|
    item = self.ng_xml.at_xpath('/resource[@sequence=\''+i.to_s+'\']')
    item['sequence'] = (up ? i-1 : i+1).to_s    # if you're going up, everything else comes down and vice versa
  end
  node['sequence'] = new_position.to_s          # set the node we already had last, so we don't hit it twice!
  return node
end

#public_xmlObject



36
37
38
39
40
41
42
43
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 36

def public_xml
  result = self.ng_xml.clone
  result.xpath('/contentMetadata/resource[not(file[(@deliver="yes" or @publish="yes")])]'   ).each { |n| n.remove }
  result.xpath('/contentMetadata/resource/file[not(@deliver="yes" or @publish="yes")]'      ).each { |n| n.remove }
  result.xpath('/contentMetadata/resource/file').xpath('@preserve|@shelve|@publish|@deliver').each { |n| n.remove }
  result.xpath('/contentMetadata/resource/file/checksum'                                    ).each { |n| n.remove }
  result
end

#remove_file(file_name) ⇒ Object



122
123
124
125
126
127
128
129
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 122

def remove_file file_name
  xml=self.ng_xml
  xml.search('//file[@id=\''+file_name+'\']').each do |node|
    node.remove
  end
  self.content=xml.to_s
  self.save
end

#remove_resource(resource_name) ⇒ Object



107
108
109
110
111
112
113
114
115
116
117
118
119
120
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 107

def remove_resource resource_name
  xml=self.ng_xml
  node = singular_node('//resource[@id=\''+resource_name+'\']')
  position = node['sequence'].to_i+1
  node.remove
  while true
    res=xml.search('//resource[@sequence=\''+position.to_s+'\']')
    break if res.length==0
    res['sequence']=position.to_s
    position=position+1
  end
  self.content=xml.to_s
  self.save
end

#rename_file(old_name, new_name) ⇒ Nokogiri::XML::Element

Returns the file node.

Parameters:

  • old_name (String)

    unique id attribute of the file element

  • new_name (String)

    new unique id value being assigned

Returns:

  • (Nokogiri::XML::Element)

    the file node



204
205
206
207
208
209
210
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 204

def rename_file old_name, new_name
  xml=self.ng_xml
  file_node=xml.search('//file[@id=\''+old_name+'\']').first
  file_node['id']=new_name
  self.content=xml.to_s
  self.save
end

#set_content_type(old_type, old_resource_type, new_type, new_resource_type) ⇒ Object

Set the content type and the resource types for all resources

Parameters:

  • new_type (String)

    the new content type, ex book

  • new_resource_type (String)

    the new type for all resources, ex book



260
261
262
263
264
265
266
267
268
269
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 260

def set_content_type old_type, old_resource_type, new_type, new_resource_type
  xml=self.ng_xml
  xml.search('/contentMetadata[@type=\''+old_type+'\']').each do |node|
    node['type']=new_type
    xml.search('//resource[@type=\''+old_resource_type+'\']').each do |resource|
      resource['type']=new_resource_type
    end
  end
  self.content=xml.to_s
end

#singular_node(xpath) ⇒ Nokogiri::XML::Element

Only use this when you want the behavior of raising an exception if anything besides exactly one matching node is found. Otherwise just use .xpath, .at_xpath or .search.

Parameters:

  • xpath (String)

    accessor invocation for Nokogiri xpath

Returns:

  • (Nokogiri::XML::Element)

    the matched element



275
276
277
278
279
280
281
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 275

def singular_node xpath
  node = self.ng_xml.search(xpath)
  len  = node.length
  raise "#{xpath} not found" if len < 1
  raise "#{xpath} duplicated: #{len} found" if len != 1
  node.first
end

#to_solr(solr_doc = Hash.new, *args) ⇒ Object

Terminology-based solrization is going to be painfully slow for large contentMetadata streams. Just select the relevant elements instead. TODO: Call super()?



164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 164

def to_solr(solr_doc=Hash.new, *args)
  doc = self.ng_xml
  return solr_doc unless doc.root['type']

  preserved_size=0
  counts = Hash.new(0)                # default count is zero
  resource_type_counts = Hash.new(0)  # default count is zero
  first_shelved_image=nil

  doc.xpath('contentMetadata/resource').sort { |a,b| a['sequence'].to_i <=> b['sequence'].to_i }.each do |resource|
    counts['resource']+=1
    resource_type_counts[resource['type']]+=1 if resource['type']
    resource.xpath('file').each do |file|
      counts['content_file']+=1
      preserved_size += file['size'].to_i if file['preserve'] == 'yes'
      if file['shelve'] == 'yes'
        counts['shelved_file']+=1
        if first_shelved_image.nil? && file['id'].match(/jp2$/)
          first_shelved_image=file['id']
        end
      end
    end
  end
  solr_doc["content_type_ssim"              ] = doc.root['type']
  solr_doc["content_file_count_itsi"        ] = counts['content_file']
  solr_doc["shelved_content_file_count_itsi"] = counts['shelved_file']
  solr_doc["resource_count_itsi"            ] = counts['resource']
  solr_doc["preserved_size_dbtsi"           ] = preserved_size        # double (trie) to support very large sizes
  solr_doc["resource_types_ssim"            ] = resource_type_counts.keys if resource_type_counts.size > 0
  resource_type_counts.each do |key, count|
    solr_doc["#{key}_resource_count_itsi"] = count
  end
  # first_shelved_image is neither indexed nor multiple
  solr_doc["first_shelved_image_ss"] = first_shelved_image unless first_shelved_image.nil?
  solr_doc
end

#update_attributes(file_name, publish, shelve, preserve) ⇒ Object



130
131
132
133
134
135
136
137
138
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 130

def update_attributes file_name, publish, shelve, preserve
  xml=self.ng_xml
  file_node=xml.search('//file[@id=\''+file_name+'\']').first
  file_node['shelve'  ]=shelve
  file_node['publish' ]=publish
  file_node['preserve']=preserve
  self.content=xml.to_s
  self.save
end

#update_file(file, old_file_id) ⇒ Object



139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 139

def update_file file, old_file_id
  xml=self.ng_xml
  file_node=xml.search('//file[@id=\''+old_file_id+'\']').first
  file_node['id']=file[:name]
  [:md5, :sha1].each { |algo|
    next if file[algo].nil?
    checksum_node = xml.search('//file[@id=\''+old_file_id+'\']/checksum[@type=\'' + algo.to_s + '\']').first
    if checksum_node.nil?
      checksum_node = Nokogiri::XML::Node.new('checksum',xml)
      file_node.add_child(checksum_node)
    end
    checksum_node['type'] = algo.to_s
    checksum_node.content = file[algo]
  }

  [:size, :shelve, :preserve, :publish].each{ |x|
    file_node[x.to_s] = file[x] if file[x]
  }
  self.content=xml.to_s
  self.save
end

#update_resource_label(resource_name, new_label) ⇒ Nokogiri::XML::Element

Updates old label OR creates a new one if necessary

Parameters:

  • resource_name (String)

    unique id attribute of the resource

  • new_label (String)

    label value being assigned

Returns:

  • (Nokogiri::XML::Element)

    the resource node



216
217
218
219
220
221
222
223
224
225
226
227
228
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 216

def update_resource_label resource_name, new_label
  node = singular_node('//resource[@id=\''+resource_name+'\']')
  labels = node.xpath('./label')
  if(labels.length==0)
    #create a label
    label_node = Nokogiri::XML::Node.new('label',self.ng_xml)
    label_node.content=new_label
    node.add_child(label_node)
  else
    labels.first.content=new_label
  end
  return node
end

#update_resource_type(resource_name, new_type) ⇒ Object

Parameters:

  • resource_name (String)

    unique id attribute of the resource

  • new_type (String)

    type value being assigned



232
233
234
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 232

def update_resource_type resource_name, new_type
  singular_node('//resource[@id=\''+resource_name+'\']')['type']=new_type
end