Module: Solrizer::XML::TerminologyBasedSolrizer

Defined in:
lib/solrizer/xml/terminology_based_solrizer.rb

Overview

This module is only suitable to mix into Classes that use the OM::XML::Document Module

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#field_mapperObject

Instance Methods



87
88
89
# File 'lib/solrizer/xml/terminology_based_solrizer.rb', line 87

def field_mapper
  @field_mapper
end

Class Method Details

.default_field_mapperObject



4
5
6
# File 'lib/solrizer/xml/terminology_based_solrizer.rb', line 4

def self.default_field_mapper
  @@default_field_mapper ||= Solrizer::FieldMapper::Default.new
end

.solrize(doc, solr_doc = Hash.new, field_mapper = nil) ⇒ Object

Build a solr document from doc based on its terminology

Parameters:

  • doc (OM::XML::Document)
  • (optional) (Hash)

    solr_doc (values hash) to populate



13
14
15
16
17
18
19
20
21
# File 'lib/solrizer/xml/terminology_based_solrizer.rb', line 13

def self.solrize(doc, solr_doc=Hash.new, field_mapper = nil)
  unless doc.class.terminology.nil?
    doc.class.terminology.terms.each_pair do |term_name,term|
      doc.solrize_term(term, solr_doc, field_mapper)
    end
  end

  return solr_doc
end

.solrize_node(node_value, doc, term_pointer, term, solr_doc = Hash.new, field_mapper = nil, opts = {}) ⇒ Hash

Populate a solr document with solr fields corresponding to the given xml node Field names are generated using settings from the term in the doc‘s terminology corresponding to term_pointer If the supplied term does not have an index_as attribute, no indexing will be performed.

Parameters:

  • node (Nokogiri::XML::Node)

    to solrize

  • doc (OM::XML::Document)

    document the node came from

  • term_pointer (Array)

    Array pointing to the term that should be used for solrization settings

  • term (Term)

    the term to be solrized

  • (optional) (Hash)

    solr_doc (values hash) to populate

Returns:

  • (Hash)

    the solr doc



62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# File 'lib/solrizer/xml/terminology_based_solrizer.rb', line 62

def self.solrize_node(node_value, doc, term_pointer, term, solr_doc = Hash.new, field_mapper = nil, opts = {})
  return solr_doc unless term.index_as && !term.index_as.empty?
  field_mapper ||= self.default_field_mapper
  
  generic_field_name_base = OM::XML::Terminology.term_generic_name(*term_pointer)
  
  field_mapper.solr_names_and_values(generic_field_name_base, node_value, term.type, term.index_as).each do |field_name, field_value|
    unless field_value.join("").strip.empty?
      ::Solrizer::Extractor.insert_solr_field_value(solr_doc, field_name, field_value)
    end
  end
  
  if term_pointer.length > 1
    hierarchical_field_name_base = OM::XML::Terminology.term_hierarchical_name(*term_pointer)
    field_mapper.solr_names_and_values(hierarchical_field_name_base, node_value, term.type, term.index_as).each do |field_name, field_value|
      unless field_value.join("").strip.empty?
        ::Solrizer::Extractor.insert_solr_field_value(solr_doc, field_name, field_value)
      end
    end
  end
  solr_doc
end

.solrize_term(doc, term, solr_doc = Hash.new, field_mapper = nil, opts = {}) ⇒ Object

Populate a solr document with fields based on nodes in xml Values for a term are gathered by to term_pointer using OM::XML::TermValueOperators.term_values and are deserialized by OM according to :type, as determined in its terminology. The content of the actual field in solr is each node of the nodeset returned by OM, rendered to a string.

Parameters:

  • doc (OM::XML::Document)

    xml document to extract values from

  • term (OM::XML::Term)

    corresponding to desired xml values

  • (optional) (Hash)

    solr_doc (values hash) to populate



31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# File 'lib/solrizer/xml/terminology_based_solrizer.rb', line 31

def self.solrize_term(doc, term, solr_doc = Hash.new, field_mapper = nil, opts={})
  parents = opts.fetch(:parents, [])
  term_pointer = parents+[term.name]
  nodeset = doc.term_values(*term_pointer)
  
  nodeset.each do |n|
    
    # TODO: Solrizer::FieldMapper::Default is supposed to translate dates into full ISO 8601 formatted strings.
    # However, there an integration issue with ActiveFedora using OM: it ignores the default field mapper given
    # in this gem that does this. So, the following is a workaround until it is fixed.
    node = n.is_a?(Date) ? DateTime.parse(n.to_s).to_time.utc.iso8601 : n.to_s
    
    doc.solrize_node(node.to_s, term_pointer, term, solr_doc, field_mapper)
    unless term.kind_of? OM::XML::NamedTermProxy
      term.children.each_pair do |child_term_name, child_term|
        doc.solrize_term(child_term, solr_doc, field_mapper, opts={:parents=>parents+[{term.name=>nodeset.index(node.to_s)}]})
      end
    end
  end
  solr_doc
end

Instance Method Details

#solrize_node(node, term_pointer, term, solr_doc = Hash.new, field_mapper = self.field_mapper, opts = {}) ⇒ Object



97
98
99
# File 'lib/solrizer/xml/terminology_based_solrizer.rb', line 97

def solrize_node(node, term_pointer, term, solr_doc = Hash.new, field_mapper = self.field_mapper, opts={})
  Solrizer::XML::TerminologyBasedSolrizer.solrize_node(node, self, term_pointer, term, solr_doc, field_mapper, opts)
end

#solrize_term(term, solr_doc = Hash.new, field_mapper = self.field_mapper, opts = {}) ⇒ Object



93
94
95
# File 'lib/solrizer/xml/terminology_based_solrizer.rb', line 93

def solrize_term(term, solr_doc = Hash.new, field_mapper = self.field_mapper, opts={})
  Solrizer::XML::TerminologyBasedSolrizer.solrize_term(self, term, solr_doc, field_mapper, opts)    
end

#to_solr(solr_doc = Hash.new, field_mapper = self.field_mapper) ⇒ Object

:nodoc:



89
90
91
# File 'lib/solrizer/xml/terminology_based_solrizer.rb', line 89

def to_solr(solr_doc = Hash.new, field_mapper = self.field_mapper) # :nodoc:
  Solrizer::XML::TerminologyBasedSolrizer.solrize(self, solr_doc, field_mapper)
end