Module: Dor::Describable

Extended by:
ActiveSupport::Concern
Included in:
Abstract
Defined in:
lib/dor/models/concerns/describable.rb

Defined Under Namespace

Classes: CrosswalkError

Constant Summary collapse

MODS_TO_DC_XSLT =
Nokogiri::XSLT(File.new(File.expand_path(File.dirname(__FILE__) + "/mods2dc.xslt")))
XMLNS_OAI_DC =
'http://www.openarchives.org/OAI/2.0/oai_dc/'.freeze
XMLNS_DC =
'http://purl.org/dc/elements/1.1/'.freeze

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.get_collection_title(obj) ⇒ Object



293
294
295
# File 'lib/dor/models/concerns/describable.rb', line 293

def self.get_collection_title(obj)
  obj.full_title
end

Instance Method Details

#add_access_conditions(doc) ⇒ Object

Note:

this method modifies the passed in doc

Create MODS accessCondition statements from rightsMetadata

Parameters:



73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# File 'lib/dor/models/concerns/describable.rb', line 73

def add_access_conditions(doc)
  # clear out any existing accessConditions
  doc.xpath('//mods:accessCondition', 'mods' => 'http://www.loc.gov/mods/v3').each {|n| n.remove}
  rights = datastreams['rightsMetadata'].ng_xml

  rights.xpath('//use/human[@type="useAndReproduction"]').each do |use|
    txt = use.text.strip
    next if txt.empty?
    doc.root.element_children.last.add_next_sibling doc.create_element('accessCondition', txt, :type => 'useAndReproduction')
  end
  rights.xpath('//copyright/human[@type="copyright"]').each do |cr|
    txt = cr.text.strip
    next if txt.empty?
    doc.root.element_children.last.add_next_sibling doc.create_element('accessCondition', txt, :type => 'copyright')
  end
  rights.xpath("//use/machine[#{ci_compare('type', 'creativecommons')}]").each do |lic_type|
    next if lic_type.text =~ /none/i
    lic_text = rights.at_xpath("//use/human[#{ci_compare('type', 'creativecommons')}]").text.strip
    next if lic_text.empty?
    new_text = "CC #{lic_type.text}: #{lic_text}"
    doc.root.element_children.last.add_next_sibling doc.create_element('accessCondition', new_text, :type => 'license')
  end
  rights.xpath("//use/machine[#{ci_compare('type', 'opendatacommons')}]").each do |lic_type|
    next if lic_type.text =~ /none/i
    lic_text = rights.at_xpath("//use/human[#{ci_compare('type', 'opendatacommons')}]").text.strip
    next if lic_text.empty?
    new_text = "ODC #{lic_type.text}: #{lic_text}"
    doc.root.element_children.last.add_next_sibling doc.create_element('accessCondition', new_text, :type => 'license')
  end
end

#add_collection_reference(doc) ⇒ Void

Note:

this method modifies the passed in doc

Adds to desc metadata a relatedItem with information about the collection this object belongs to. For use in published mods and mods-to-DC conversion.

Parameters:

Returns:

  • (Void)


151
152
153
154
155
156
157
158
159
160
161
162
163
164
# File 'lib/dor/models/concerns/describable.rb', line 151

def add_collection_reference(doc)
  return unless methods.include? :public_relationships
  collections = public_relationships.search('//rdf:RDF/rdf:Description/fedora:isMemberOfCollection',
                                   'fedora' => 'info:fedora/fedora-system:def/relations-external#',
                                   'rdf' => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#')
  return if collections.empty?

  remove_related_item_nodes_for_collections(doc)

  collections.each do |collection_node|
    collection_druid = collection_node['rdf:resource'].gsub('info:fedora/', '')
    add_related_item_node_for_collection(doc, collection_druid)
  end
end

#add_constituent_relations(doc) ⇒ Void

expand constituent relations into relatedItem references – see JUMBO-18

Parameters:

  • doc (Nokogiri::XML)

    public MODS XML being built

Returns:

  • (Void)


169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
# File 'lib/dor/models/concerns/describable.rb', line 169

def add_constituent_relations(doc)
  public_relationships.search('//rdf:RDF/rdf:Description/fedora:isConstituentOf',
                                   'fedora' => 'info:fedora/fedora-system:def/relations-external#',
                                   'rdf' => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' ).each do |parent|
    # fetch the parent object to get title
    druid = parent['rdf:resource'].gsub(/^info:fedora\//, '')
    parent_item = Dor.find(druid)

    # create the MODS relation
    relatedItem = doc.create_element 'relatedItem'
    relatedItem['type'] = 'host'
    relatedItem['displayLabel'] = 'Appears in'

    # load the title from the parent's DC.title
    titleInfo = doc.create_element 'titleInfo'
    title = doc.create_element 'title'
    title.content = Dor::Describable.get_collection_title(parent_item)
    titleInfo << title
    relatedItem << titleInfo

    # point to the PURL for the parent
    location = doc.create_element 'location'
    url = doc.create_element 'url'
    url.content = "http://#{Dor::Config.stacks.document_cache_host}/#{druid.split(':').last}"
    location << url
    relatedItem << location

    # finish up by adding relation to public MODS
    doc.root << relatedItem
  end
end

#add_dc_to_solr_doc(solr_doc) ⇒ Object



213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
# File 'lib/dor/models/concerns/describable.rb', line 213

def add_dc_to_solr_doc(solr_doc)
  dc_doc = generate_dublin_core(include_collection_as_related_item: false)
  # we excluding the generated collection relation here; we instead get the collection
  # title from Dor::Identifiable.
  dc_doc.xpath('/oai_dc:dc/*', oai_dc: XMLNS_OAI_DC).each do |node|
    add_solr_value(solr_doc, "public_dc_#{node.name}", node.text, :string, [:stored_searchable])
  end
  creator = ''
  dc_doc.xpath('//dc:creator', dc: XMLNS_DC).each do |node|
    creator = node.text
  end
  title = ''
  dc_doc.xpath('//dc:title', dc: XMLNS_DC).each do |node|
    title = node.text
  end
  creator_title = creator + title
  add_solr_value(solr_doc, 'creator_title', creator_title, :string, [:stored_sortable])
rescue CrosswalkError => e
  Dor.logger.warn "Cannot index #{pid}.descMetadata: #{e.message}"
end

#add_metadata_format_to_solr_doc(solr_doc) ⇒ Object



208
209
210
211
# File 'lib/dor/models/concerns/describable.rb', line 208

def (solr_doc)
  solr_doc['metadata_format_ssim'] ||= []
  solr_doc['metadata_format_ssim'] += ['mods']
end

#add_mods_to_solr_doc(solr_doc) ⇒ Object



234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
# File 'lib/dor/models/concerns/describable.rb', line 234

def add_mods_to_solr_doc(solr_doc)
  mods_sources = {
    sw_title_display: %w(sw_display_title_tesim),
    main_author_w_date: %w(sw_author_ssim sw_author_tesim),
    sw_sort_author: %w(sw_author_sort_ssi),
    sw_language_facet: %w(sw_language_ssim sw_language_tesim),
    sw_genre: %w(sw_genre_ssim sw_genre_tesim),
    format_main: %w(sw_format_ssim sw_format_tesim),
    topic_facet: %w(sw_topic_ssim sw_topic_tesim),
    era_facet: %w(sw_subject_temporal_ssim sw_subject_temporal_tesim),
    geographic_facet: %w(sw_subject_geographic_ssim sw_subject_geographic_tesim),
    [:term_values, :typeOfResource] => %w(mods_typeOfResource_ssim mods_typeOfResource_tesim),
    pub_year_sort_str: %w(sw_pub_date_sort_ssi),
    pub_year_int: %w(sw_pub_date_sort_isi),
    pub_year_display_str: %w(sw_pub_date_facet_ssi)
  }

  mods_sources.each_pair do |meth, solr_keys|
    vals = meth.is_a?(Array) ? stanford_mods.send(meth.shift, *meth) : stanford_mods.send(meth)

    next if vals.nil? || (vals.respond_to?(:empty?) && vals.empty?)

    solr_keys.each do |key|
      solr_doc[key] ||= []
      solr_doc[key].push *vals
    end
    # asterisk to avoid multi-dimensional array: push values, not the array
  end

  # convert multivalued fields to single value
  %w(sw_pub_date_sort_ssi sw_pub_date_sort_isi sw_pub_date_facet_ssi).each do |key|
    solr_doc[key] = solr_doc[key].first unless solr_doc[key].nil?
  end
  # some fields get explicit "(none)" placeholder values, mostly for faceting
  %w(sw_language_tesim sw_genre_tesim sw_format_tesim).each do |key|
    solr_doc[key] = ['(none)'] if solr_doc[key].nil? || solr_doc[key].empty?
  end
  solr_doc
end


111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# File 'lib/dor/models/concerns/describable.rb', line 111

def add_related_item_node_for_collection(doc, collection_druid)
  begin
    collection_obj = Dor.find(collection_druid)
  rescue ActiveFedora::ObjectNotFoundError
    return nil
  end

  title_node         = Nokogiri::XML::Node.new('title', doc)
  title_node.content = Dor::Describable.get_collection_title(collection_obj)

  title_info_node = Nokogiri::XML::Node.new('titleInfo', doc)
  title_info_node.add_child(title_node)

  # e.g.:
  #   <location>
  #     <url>http://purl.stanford.edu/rh056sr3313</url>
  #   </location>
  loc_node = doc.create_element('location')
  url_node = doc.create_element('url')
  url_node.content = "https://#{Dor::Config.stacks.document_cache_host}/#{collection_druid.split(':').last}"
  loc_node << url_node

  type_node = Nokogiri::XML::Node.new('typeOfResource', doc)
  type_node['collection'] = 'yes'

  related_item_node = Nokogiri::XML::Node.new('relatedItem', doc)
  related_item_node['type'] = 'host'

  related_item_node.add_child(title_info_node)
  related_item_node.add_child(loc_node)
  related_item_node.add_child(type_node)

  doc.root.add_child(related_item_node)
end

#build_descMetadata_datastream(ds) ⇒ Object



35
36
37
38
39
40
41
42
# File 'lib/dor/models/concerns/describable.rb', line 35

def (ds)
  content = 
  return nil if content.nil?
  ds.dsLabel = 'Descriptive Metadata'
  ds.ng_xml = Nokogiri::XML(content)
  ds.ng_xml.normalize_text!
  ds.content = ds.ng_xml.to_xml
end

#fetch_descMetadata_datastreamObject



29
30
31
32
33
# File 'lib/dor/models/concerns/describable.rb', line 29

def 
  candidates = datastreams['identityMetadata'].otherId.collect { |oid| oid.to_s }
   = Dor::MetadataService.resolvable(candidates).first
  .nil? ? nil : Dor::MetadataService.fetch(.to_s)
end

#full_titleObject



297
298
299
# File 'lib/dor/models/concerns/describable.rb', line 297

def full_title
  stanford_mods.sw_title_display
end

#generate_dublin_core(include_collection_as_related_item: true) ⇒ Nokogiri::Doc

Generates Dublin Core from the MODS in the descMetadata datastream using the LoC mods2dc stylesheet

Should not be used for the Fedora DC datastream

Returns:

  • (Nokogiri::Doc)

    the DublinCore XML document object

Raises:

  • (CrosswalkError)

    Raises an Exception if the generated DC is empty or has no children



48
49
50
51
52
53
54
55
56
# File 'lib/dor/models/concerns/describable.rb', line 48

def generate_dublin_core(include_collection_as_related_item: true)
  desc_md = .ng_xml.dup(1)
  add_collection_reference(desc_md) if include_collection_as_related_item
  dc_doc = MODS_TO_DC_XSLT.transform(desc_md)
  dc_doc.xpath('/oai_dc:dc/*[count(text()) = 0]', oai_dc: XMLNS_OAI_DC).remove # Remove empty nodes
  raise CrosswalkError, "Dor::Item#generate_dublin_core produced incorrect xml (no root):\n#{dc_doc.to_xml}" if dc_doc.root.nil?
  raise CrosswalkError, "Dor::Item#generate_dublin_core produced incorrect xml (no children):\n#{dc_doc.to_xml}" if dc_doc.root.children.size == 0
  dc_doc
end

#generate_public_desc_mdString

Returns Public descriptive medatada XML.

Returns:

  • (String)

    Public descriptive medatada XML



59
60
61
62
63
64
65
66
67
68
# File 'lib/dor/models/concerns/describable.rb', line 59

def generate_public_desc_md
  doc = .ng_xml.dup(1)
  add_collection_reference(doc)
  add_access_conditions(doc)
  add_constituent_relations(doc)
  doc.xpath('//comment()').remove
  new_doc = Nokogiri::XML(doc.to_xml) { |x| x.noblanks }
  new_doc.encoding = 'UTF-8'
  new_doc.to_xml
end

Remove existing relatedItem entries for collections from descMetadata



105
106
107
108
109
# File 'lib/dor/models/concerns/describable.rb', line 105

def remove_related_item_nodes_for_collections(doc)
  doc.search('/mods:mods/mods:relatedItem[@type="host"]/mods:typeOfResource[@collection=\'yes\']', 'mods' => 'http://www.loc.gov/mods/v3').each do |node|
    node.parent.remove
  end
end

#set_desc_metadata_using_label(force = false) ⇒ String

Returns descMetadata.content XML.

Parameters:

  • force (Boolean) (defaults to: false)

    Overwrite existing XML

Returns:

  • (String)

    descMetadata.content XML



276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
# File 'lib/dor/models/concerns/describable.rb', line 276

def (force = false)
  unless force || .new?
    raise 'Cannot proceed, there is already content in the descriptive metadata datastream: ' + .content.to_s
  end
  label = self.label
  builder = Nokogiri::XML::Builder.new { |xml|
    xml.mods(
      'xmlns' => 'http://www.loc.gov/mods/v3', 'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance', :version => '3.3',
      'xsi:schemaLocation' => 'http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-3.xsd') {
      xml.titleInfo {
        xml.title label
      }
    }
  }
  .content = builder.to_xml
end

#stanford_mods(content = nil, ns_aware = true) ⇒ Object

intended for read-access, “as SearchWorks would see it”, mostly for to_solr()

Parameters:

  • content (Nokogiri::XML::Document) (defaults to: nil)

    Nokogiri descMetadata document (overriding internal data)

  • ns_aware (boolean) (defaults to: true)

    namespace awareness toggle for from_nk_node()



20
21
22
23
24
25
26
27
# File 'lib/dor/models/concerns/describable.rb', line 20

def stanford_mods(content = nil, ns_aware = true)
  @stanford_mods ||= begin
    m = Stanford::Mods::Record.new
    desc = content.nil? ? .ng_xml : content
    m.from_nk_node(desc.root, ns_aware)
    m
  end
end

#to_solr(solr_doc = {}, *args) ⇒ Object



201
202
203
204
205
206
# File 'lib/dor/models/concerns/describable.rb', line 201

def to_solr(solr_doc = {}, *args)
  solr_doc = super solr_doc, *args
  (solr_doc)
  add_dc_to_solr_doc(solr_doc)
  add_mods_to_solr_doc(solr_doc)
end