Class: Dor::ContentMetadataDS

Inherits:
ActiveFedora::OmDatastream
  • Object
show all
Includes:
Upgradable, SolrDocHelper
Defined in:
lib/dor/datastreams/content_metadata_ds.rb

Instance Method Summary collapse

Methods included from SolrDocHelper

#add_solr_value

Methods included from Upgradable

add_upgrade_callback, included, run_upgrade_callbacks, #upgrade!

Instance Method Details

#add_file(file, resource_name) ⇒ Object



44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 44

def add_file(file, resource_name)
  xml=self.ng_xml
  resource_nodes = xml.search('//resource[@id=\''+resource_name+'\']')
  if resource_nodes.length==0
    raise 'resource doesnt exist.'
  end
  node=resource_nodes.first
  file_node=Nokogiri::XML::Node.new('file',xml)
  file_node['id']=file[:name]
  file_node['shelve']=file[:shelve] ? file[:shelve] : ''
  file_node['publish']=file[:publish] ? file[:publish] : ''
  file_node['preserve']=file[:preserve] ? file[:preserve] : ''
  node.add_child(file_node)

  if file[:md5]
    checksum_node=Nokogiri::XML::Node.new('checksum',xml)
    checksum_node['type']='md5'
    checksum_node.content=file[:md5]
    file_node.add_child(checksum_node)
  end
  if file[:sha1]
    checksum_node=Nokogiri::XML::Node.new('checksum',xml)
    checksum_node['type']='sha1'
    checksum_node.content=file[:sha1]
    file_node.add_child(checksum_node)
  end
  if file[:size]
    file_node['size']=file[:size]
  end
  if file[:mime_type]
    file_node['mimetype']=file[:mime_type]
  end
  self.content=xml.to_s
  self.save
end

#add_resource(files, resource_name, position, type = "file") ⇒ Object



80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 80

def add_resource(files,resource_name, position,type="file") 
  xml=self.ng_xml
  if xml.search('//resource[@id=\''+resource_name+'\']').length>0
    raise 'resource '+resource_name+' already exists'
  end
  node=nil

  max=-1
  xml.search('//resource').each do |node|
    if node['sequence'].to_i>max
      max=node['sequence'].to_i
    end
  end
  #renumber all of the resources that will come after the newly added one
  while max>position do
    node=xml.search('//resource[@sequence=\'' + position + '\']')
    if node.length>0
      node=node.first
      node[sequence]=max+1
    end
    max=max-1
  end
  node=Nokogiri::XML::Node.new('resource',xml)
  node['sequence']=position.to_s
  node['id']=resource_name
  node['type']=type
  files.each do |file|
    file_node=Nokogiri::XML::Node.new('file',xml)
    %w[shelve publish preserve].each {|x| file_node[x] = file[x.to_sym] ? file[x.to_sym] : '' }
    file_node['id'] = file[:name]
    node.add_child(file_node)

    [:md5, :sha1].each { |algo|
      next if file[algo].nil?
      checksum_node = Nokogiri::XML::Node.new('checksum',xml)
      checksum_node['type'] = algo.to_s
      checksum_node.content = file[algo]
      file_node.add_child(checksum_node)
    }
    file_node['size'] = file[:size] if file[:size]
  end    
  xml.search('//contentMetadata').first.add_child(node)
  self.content=xml.to_s
  self.save
end

#move_resource(resource_name, new_position) ⇒ Object



268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 268

def move_resource resource_name, new_position
  xml=self.ng_xml
  file_node=xml.search('//resource[@id=\''+resource_name+'\']')
  if(file_node.length!=1)
    raise 'Resource not found or duplicate found.'
  end
  position=file_node.first['sequence'].to_i
  #is the resource being moved earlier in the sequence or later?
  new_position=new_position.to_i
  if new_position>position
    counter=position
    while true
      if counter == position
        break
      end
      item=xml.search('/resource[@id=\''+counter.to_s+'\']').first
      counter=counter+1
      item['sequence']=counter.to_s
    end
  else
    counter=position
    while true
      if counter == new_position
        break
      end
      item=xml.search('/resource[@id=\''+counter.to_s+'\']').first
      counter=counter-1
      item['sequence']=counter.to_s
    end
  end
end

#public_xmlObject



36
37
38
39
40
41
42
43
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 36

def public_xml
  result = self.ng_xml.clone
  result.xpath('/contentMetadata/resource[not(file[(@deliver="yes" or @publish="yes")])]').each { |n| n.remove }
  result.xpath('/contentMetadata/resource/file[not(@deliver="yes" or @publish="yes")]').each { |n| n.remove }
  result.xpath('/contentMetadata/resource/file').xpath('@preserve|@shelve|@publish|@deliver').each { |n| n.remove }
  result.xpath('/contentMetadata/resource/file/checksum').each { |n| n.remove }
  result
end

#remove_file(file_name) ⇒ Object



149
150
151
152
153
154
155
156
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 149

def remove_file file_name
  xml=self.ng_xml
  xml.search('//file[@id=\''+file_name+'\']').each do |node|
    node.remove
  end
  self.content=xml.to_s
  self.save
end

#remove_resource(resource_name) ⇒ Object



126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 126

def remove_resource resource_name
  xml=self.ng_xml
  position=-1

  resources=xml.search('//resource[@id=\''+resource_name+'\']')
  if resources.length!=1
    raise 'Resource is missing or duplicated!'
  end
  position=resources.first['sequence']
  resources.first.remove
  position=position.to_i+1
  while true
    res=xml.search('//resource[@sequence=\''+position.to_s+'\']')
    if(res.length==0)
      break
    end
    res['sequence']=position.to_s
    position=position+1
  end
  self.content=xml.to_s
  self.save
end

#rename_file(old_name, new_name) ⇒ Object



235
236
237
238
239
240
241
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 235

def rename_file old_name, new_name
  xml=self.ng_xml
  file_node=xml.search('//file[@id=\''+old_name+'\']').first
  file_node['id']=new_name
  self.content=xml.to_s
  self.save
end

#set_content_type(old_type, old_resource_type, new_type, new_resource_type) ⇒ Object

Set the content type to and the resource types for all resources

Parameters:

  • new_type (String)

    the new content type, ex book

  • new_resource_type (String)

    the new type for all resources, ex book



302
303
304
305
306
307
308
309
310
311
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 302

def set_content_type old_type, old_resource_type, new_type, new_resource_type
  xml=self.ng_xml
  xml.search('/contentMetadata[@type=\''+old_type+'\']').each do |node|
    node['type']=new_type
    xml.search('//resource[@type=\''+old_resource_type+'\']').each do |resource|
      resource['type']=new_resource_type
    end
  end
  self.content=xml.to_s
end

#to_solr(solr_doc = Hash.new, *args) ⇒ Object

Terminology-based solrization is going to be painfully slow for large contentMetadata streams. Just select the relevant elements instead.



189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 189

def to_solr(solr_doc=Hash.new, *args)
  doc = self.ng_xml
  if doc.root['type']
    shelved_file_count=0
    content_file_count=0
    resource_type_counts={}
    resource_count=0
    preserved_size=0
    first_shelved_image=nil
    add_solr_value(solr_doc, "content_type", doc.root['type'], :string, [:facetable, :symbol])
    doc.xpath('contentMetadata/resource').sort { |a,b| a['sequence'].to_i <=> b['sequence'].to_i }.each do |resource|
      resource_count+=1
      if(resource['type'])
        if resource_type_counts[resource['type']]
          resource_type_counts[resource['type']]+=1
        else
          resource_type_counts[resource['type']]=1
        end
      end
      resource.xpath('file').each do |file|
        content_file_count+=1
        if file['shelve'] == 'yes'
          shelved_file_count+=1
          if first_shelved_image.nil? && file['id'].match(/jp2$/)
            first_shelved_image=file['id']
          end
        end
        if file['preserve'] == 'yes'
          preserved_size += file['size'].to_i
        end
      end
    end
    add_solr_value(solr_doc, "content_file_count", content_file_count.to_s, :string, [:searchable, :displayable])
    add_solr_value(solr_doc, "shelved_content_file_count", shelved_file_count.to_s, :string, [:searchable, :displayable])
    add_solr_value(solr_doc, "resource_count", resource_count.to_s, :string, [:searchable, :displayable])
    add_solr_value(solr_doc, "preserved_size", preserved_size.to_s, :string, [:searchable, :displayable])
    resource_type_counts.each do |key, count|
      add_solr_value(solr_doc, "resource_types", key, :string, [:symbol])
      add_solr_value(solr_doc, key+"_resource_count", count.to_s, :string, [:searchable, :displayable])
    end
    unless first_shelved_image.nil?
      add_solr_value(solr_doc, "first_shelved_image", first_shelved_image, :string, [:displayable])
    end
  end
  solr_doc
end

#update_attributes(file_name, publish, shelve, preserve) ⇒ Object



157
158
159
160
161
162
163
164
165
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 157

def update_attributes file_name, publish, shelve, preserve
  xml=self.ng_xml
  file_node=xml.search('//file[@id=\''+file_name+'\']').first
  file_node['shelve'  ]=shelve
  file_node['publish' ]=publish
  file_node['preserve']=preserve
  self.content=xml.to_s
  self.save
end

#update_file(file, old_file_id) ⇒ Object



166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 166

def update_file file, old_file_id
  xml=self.ng_xml
  file_node=xml.search('//file[@id=\''+old_file_id+'\']').first
  file_node['id']=file[:name]
  [:md5, :sha1].each { |algo|
    next if file[algo].nil?
    checksum_node = xml.search('//file[@id=\''+old_file_id+'\']/checksum[@type=\'' + algo.to_s + '\']').first
    if checksum_node.nil?
      checksum_node = Nokogiri::XML::Node.new('checksum',xml)
      file_node.add_child(checksum_node)
    end
    checksum_node['type'] = algo.to_s
    checksum_node.content = file[algo]
  }

  [:size, :shelve, :preserve, :publish].each{ |x|
    file_node[x.to_s] = file[x] if file[x]
  }
  self.content=xml.to_s
  self.save
end

#update_resource_label(resource_name, new_label) ⇒ Object



243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 243

def update_resource_label resource_name, new_label
  xml=self.ng_xml
  resource_node=xml.search('//resource[@id=\''+resource_name+'\']')
  if(resource_node.length!=1)
    raise 'Resource not found or duplicate found.'
  end
  labels=xml.search('//resource[@id=\''+resource_name+'\']/label')
  if(labels.length==0)
    #create a label
    label_node = Nokogiri::XML::Node.new('label',xml)
    label_node.content=new_label
    resource_node.first.add_child(label_node)
  else
    labels.first.content=new_label
  end
end

#update_resource_type(resource, new_type) ⇒ Object



259
260
261
262
263
264
265
266
# File 'lib/dor/datastreams/content_metadata_ds.rb', line 259

def update_resource_type resource, new_type
  xml=self.ng_xml
  resource_node=xml.search('//resource[@id=\''+resource_name+'\']')
  if(resource_node.length!=1)
    raise 'Resource not found or duplicate found.'
  end
  resource_node.first['type']=new_type
end