Class: Dor::ContentMetadataDS

Inherits:
ActiveFedora::OmDatastream
  • Object
show all
Includes:
Upgradable, SolrDocHelper
Defined in:
lib/dor/datastreams/content_metadata_ds.rb

Instance Method Summary collapse

Methods included from SolrDocHelper

#add_solr_value

Methods included from Upgradable

add_upgrade_callback, included, run_upgrade_callbacks, #upgrade!

Instance Method Details

#add_file(file, resource_name) ⇒ Object


44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 44

def add_file(file, resource_name)
  xml=self.ng_xml
  resource_nodes = xml.search('//resource[@id=\''+resource_name+'\']')
  if resource_nodes.length==0
    raise 'resource doesnt exist.'
  end
  node=resource_nodes.first
  file_node=Nokogiri::XML::Node.new('file',xml)
  file_node['id']=file[:name]
  file_node['shelve']=file[:shelve] ? file[:shelve] : ''
  file_node['publish']=file[:publish] ? file[:publish] : ''
  file_node['preserve']=file[:preserve] ? file[:preserve] : ''
  node.add_child(file_node)

  if file[:md5]
    checksum_node=Nokogiri::XML::Node.new('checksum',xml)
    checksum_node['type']='md5'
    checksum_node.content=file[:md5]
    file_node.add_child(checksum_node)
  end
  if file[:sha1]
    checksum_node=Nokogiri::XML::Node.new('checksum',xml)
    checksum_node['type']='sha1'
    checksum_node.content=file[:sha1]
    file_node.add_child(checksum_node)
  end
  if file[:size]
    file_node['size']=file[:size]
  end
  if file[:mime_type]
    file_node['mimetype']=file[:mime_type]
  end
  self.content=xml.to_s
  self.save
end

#add_resource(files, resource_name, position, type = "file") ⇒ Object


80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 80

def add_resource(files,resource_name, position,type="file") 
  xml=self.ng_xml
  if xml.search('//resource[@id=\''+resource_name+'\']').length>0
    raise 'resource '+resource_name+' already exists'
  end
  node=nil

  max=-1
  xml.search('//resource').each do |node|
    if node['sequence'].to_i>max
      max=node['sequence'].to_i
    end
  end
  #renumber all of the resources that will come after the newly added one
  while max>position do
    node=xml.search('//resource[@sequence=\'' + position + '\']')
    if node.length>0
      node=node.first
      node[sequence]=max+1
    end
    max=max-1
  end
  node=Nokogiri::XML::Node.new('resource',xml)
  node['sequence']=position.to_s
  node['id']=resource_name
  node['type']=type
  files.each do |file|
    file_node=Nokogiri::XML::Node.new('file',xml)
    file_node['shelve']=file[:shelve] ? file[:shelve] : ''
    file_node['publish']=file[:publish] ? file[:publish] : ''
    file_node['preserve']=file[:preserve] ? file[:preserve] : ''
    file_node['id']=file[:name]
    node.add_child(file_node)

    if not file[:md5].nil?
      checksum_node=Nokogiri::XML::Node.new('checksum',xml)
      checksum_node['type']='md5'
      checksum_node.content=file[:md5]
      file_node.add_child(checksum_node)
    end
    if not file[:sha1].nil?
      checksum_node=Nokogiri::XML::Node.new('checksum',xml)
      checksum_node['type']='sha1'
      checksum_node.content=file[:sha1]
      file_node.add_child(checksum_node)
    end
    if file[:size]
      file_node['size']=file[:size]
    end
  end    
  xml.search('//contentMetadata').first.add_child(node)
  self.content=xml.to_s
  self.save
end

#move_resource(resource_name, new_position) ⇒ Object


292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 292

def move_resource resource_name, new_position
  xml=self.ng_xml
  file_node=xml.search('//resource[@id=\''+resource_name+'\']')
  if(file_node.length!=1)
    raise 'Resource not found or duplicate found.'
  end
  position=file_node.first['sequence'].to_i
  #is the resource being moved earlier in the sequence or later?
  new_position=new_position.to_i
  if new_position>position
    counter=position
    while true
      if counter == position
        break
      end
      item=xml.search('/resource[@id=\''+counter.to_s+'\']').first
      counter=counter+1
      item['sequence']=counter.to_s
    end
  else
    counter=position
    while true
      if counter == new_position
        break
      end
      item=xml.search('/resource[@id=\''+counter.to_s+'\']').first
      counter=counter-1
      item['sequence']=counter.to_s
    end
  end
end

#public_xmlObject


36
37
38
39
40
41
42
43
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 36

def public_xml
  result = self.ng_xml.clone
  result.xpath('/contentMetadata/resource[not(file[(@deliver="yes" or @publish="yes")])]').each { |n| n.remove }
  result.xpath('/contentMetadata/resource/file[not(@deliver="yes" or @publish="yes")]').each { |n| n.remove }
  result.xpath('/contentMetadata/resource/file').xpath('@preserve|@shelve|@publish|@deliver').each { |n| n.remove }
  result.xpath('/contentMetadata/resource/file/checksum').each { |n| n.remove }
  result
end

#remove_file(file_name) ⇒ Object


158
159
160
161
162
163
164
165
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 158

def remove_file file_name
  xml=self.ng_xml
  xml.search('//file[@id=\''+file_name+'\']').each do |node|
    node.remove
  end
  self.content=xml.to_s
  self.save
end

#remove_resource(resource_name) ⇒ Object


135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 135

def remove_resource resource_name
  xml=self.ng_xml
  position=-1

  resources=xml.search('//resource[@id=\''+resource_name+'\']')
  if resources.length!=1
    raise 'Resource is missing or duplicated!'
  end
  position=resources.first['sequence']
  resources.first.remove
  position=position.to_i+1
  while true
    res=xml.search('//resource[@sequence=\''+position.to_s+'\']')
    if(res.length==0)
      break
    end
    res['sequence']=position.to_s
    position=position+1
  end
  self.content=xml.to_s
  self.save
end

#rename_file(old_name, new_name) ⇒ Object


259
260
261
262
263
264
265
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 259

def rename_file old_name, new_name
  xml=self.ng_xml
  file_node=xml.search('//file[@id=\''+old_name+'\']').first
  file_node['id']=new_name
  self.content=xml.to_s
  self.save
end

#set_content_type(old_type, old_resource_type, new_type, new_resource_type) ⇒ Object

Set the content type to and the resource types for all resources


326
327
328
329
330
331
332
333
334
335
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 326

def set_content_type old_type, old_resource_type, new_type, new_resource_type
  xml=self.ng_xml
  xml.search('/contentMetadata[@type=\''+old_type+'\']').each do |node|
    node['type']=new_type
    xml.search('//resource[@type=\''+old_resource_type+'\']').each do |resource|
      resource['type']=new_resource_type
    end
  end
  self.content=xml.to_s
end

#to_solr(solr_doc = Hash.new, *args) ⇒ Object

Terminology-based solrization is going to be painfully slow for large contentMetadata streams. Just select the relevant elements instead.


214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 214

def to_solr(solr_doc=Hash.new, *args)
  doc = self.ng_xml
  if doc.root['type']
    shelved_file_count=0
    content_file_count=0
    resource_type_counts={}
    resource_count=0
    preserved_size=0
    first_shelved_image=nil
    add_solr_value(solr_doc, "content_type", doc.root['type'], :string, [:facetable])
    doc.xpath('contentMetadata/resource').sort { |a,b| a['sequence'].to_i <=> b['sequence'].to_i }.each do |resource|
      resource_count+=1
      if(resource['type'])
        if resource_type_counts[resource['type']]
          resource_type_counts[resource['type']]+=1         
        else
          resource_type_counts[resource['type']]=1
        end
      end
      resource.xpath('file').each do |file|
        content_file_count+=1
        if file['shelve'] == 'yes'
          shelved_file_count+=1
          if first_shelved_image.nil? and file['id'].match(/jp2$/)
            first_shelved_image=file['id']
          end
        end
        if file['preserve'] == 'yes'
          preserved_size += file['size'].to_i
        end
      end
    end
    add_solr_value(solr_doc, "content_file_count", content_file_count.to_s, :string, [:searchable, :displayable])
    add_solr_value(solr_doc, "shelved_content_file_count", shelved_file_count.to_s, :string, [:searchable, :displayable])
    add_solr_value(solr_doc, "resource_count", resource_count.to_s, :string, [:searchable, :displayable])
    add_solr_value(solr_doc, "preserved_size", preserved_size.to_s, :string, [:searchable, :displayable])
    resource_type_counts.each do |key, count|
      add_solr_value(solr_doc, key+"_resource_count", count.to_s, :string, [:searchable, :displayable])
    end
    if not first_shelved_image.nil?
      add_solr_value(solr_doc, "first_shelved_image", first_shelved_image, :string, [:displayable])
    end
  end
  solr_doc
end

#update_attributes(file_name, publish, shelve, preserve) ⇒ Object


166
167
168
169
170
171
172
173
174
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 166

def update_attributes file_name, publish, shelve, preserve
  xml=self.ng_xml
  file_node=xml.search('//file[@id=\''+file_name+'\']').first
  file_node['shelve']=shelve
  file_node['publish']=publish
  file_node['preserve']=preserve
  self.content=xml.to_s
  self.save
end

#update_file(file, old_file_id) ⇒ Object


175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 175

def update_file file, old_file_id
  xml=self.ng_xml
  file_node=xml.search('//file[@id=\''+old_file_id+'\']').first
  file_node['id']=file[:name]
  if not file[:md5].nil?
    checksum_node=xml.search('//file[@id=\''+old_file_id+'\']/checksum[@type=\'md5\']').first
    if checksum_node.nil?
      checksum_node=Nokogiri::XML::Node.new('checksum',xml)
      file_node.add_child(checksum_node)
    end
    checksum_node['type']='md5'
    checksum_node.content=file[:md5]
  end
  if not file[:sha1].nil?
    checksum_node=xml.search('//file[@id=\''+old_file_id+'\']/checksum[@type=\'sha1\']').first
    if checksum_node.nil?
      checksum_node=Nokogiri::XML::Node.new('checksum',xml)
      file_node.add_child(checksum_node)
    end
    checksum_node['type']='sha1'
    checksum_node.content=file[:sha1]
  end
  if file[:size]
    file_node['size']=file[:size]
  end
  if file[:shelve]
    file_node['shelve']=file[:shelve]
  end
  if file[:preserve]
    file_node['preserve']=file[:preserve]
  end
  if file[:publish]
    file_node['publish']=file[:publish]
  end
  self.content=xml.to_s
  self.save
end

#update_resource_label(resource_name, new_label) ⇒ Object


267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 267

def update_resource_label resource_name, new_label
  xml=self.ng_xml
  resource_node=xml.search('//resource[@id=\''+resource_name+'\']')
  if(resource_node.length!=1)
    raise 'Resource not found or duplicate found.'
  end
  labels=xml.search('//resource[@id=\''+resource_name+'\']/label')
  if(labels.length==0)
    #create a label
    label_node = Nokogiri::XML::Node.new('label',xml)
    label_node.content=new_label
    resource_node.first.add_child(label_node)
  else
    labels.first.content=new_label
  end
end

#update_resource_type(resource, new_type) ⇒ Object


283
284
285
286
287
288
289
290
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 283

def update_resource_type resource, new_type
  xml=self.ng_xml
  resource_node=xml.search('//resource[@id=\''+resource_name+'\']')
  if(resource_node.length!=1)
    raise 'Resource not found or duplicate found.'
  end
  resource_node.first['type']=new_type
end