Class: Solrizer::Fedora::Indexer

Inherits:

Object

Object
Solrizer::Fedora::Indexer

show all

Defined in:: lib/solrizer/fedora/indexer.rb

Constant Summary collapse

@@unique_id = Class variables

Instance Attribute Summary collapse

#extractor ⇒ Object

The extractor to use.
#index_full_text ⇒ Object
Boolean or “true” or “false”

tells the indexer whether to index full text or just field values.
#solr ⇒ Object

The instance of solr that updates will be written to.

Class Method Summary collapse

.unique_id ⇒ Object

Instance Method Summary collapse

#class_exists?(class_name) ⇒ Boolean
#extract_rels_ext(obj, ds_name, solr_doc = Hash.new) ⇒ Object
#extract_xml_to_solr(obj, ds_name, solr_doc = Hash.new) ⇒ Object

This method extracts the facet categories from the given Fedora object’s external tag datastream.
#generate_dates(solr_doc) ⇒ Object

This method generates the month and day facets from the date_t in solr_doc.
#index(obj) ⇒ Object

This method adds a document to the Solr search index.
#initialize(opts = {}) ⇒ Indexer constructor

This method performs initialization tasks.
#query(query_str) ⇒ Object

This method queries the Solr search index and returns a response.

Constructor Details

#initialize(opts = {}) ⇒ `Indexer`

This method performs initialization tasks

# File 'lib/solrizer/fedora/indexer.rb', line 29

def initialize( opts={} )
  @@index_list = false unless defined?(@@index_list)
  @extractor = ::Solrizer::Extractor.new
  
  if opts[:index_full_text] == true || opts[:index_full_text] == "true"
    @index_full_text = true 
  else
    @index_full_text = false 
  end
  
  connect
end

Instance Attribute Details

#extractor ⇒ `Object`

The extractor to use. This is usually Solrizer::Extractor



21
22
23

# File 'lib/solrizer/fedora/indexer.rb', line 21

def extractor
  @extractor
end

#index_full_text ⇒ `Object`

Boolean or “true” or “false”: tells the indexer whether to index full text or just field values



24
25
26

# File 'lib/solrizer/fedora/indexer.rb', line 24

def index_full_text
  @index_full_text
end

#solr ⇒ `Object`

The instance of solr that updates will be written to



18
19
20

# File 'lib/solrizer/fedora/indexer.rb', line 18

def solr
  @solr
end

Class Method Details

.unique_id ⇒ `Object`



12
13
14

# File 'lib/solrizer/fedora/indexer.rb', line 12

def self.unique_id
  @@unique_id
end

Instance Method Details

#class_exists?(class_name) ⇒ `Boolean`

Returns:

(Boolean)

# File 'lib/solrizer/fedora/indexer.rb', line 238

def class_exists?(class_name)
  klass = Module.const_get(class_name)
  return klass.is_a?(Class)
rescue NameError
  return false
end

#extract_rels_ext(obj, ds_name, solr_doc = Hash.new) ⇒ `Object`

# File 'lib/solrizer/fedora/indexer.rb', line 116

def extract_rels_ext( obj, ds_name, solr_doc=Hash.new )
  rels_ext_ds = Repository.get_datastream( obj, ds_name )
  extractor.extract_rels_ext( rels_ext_ds.content, solr_doc )
end

#extract_xml_to_solr(obj, ds_name, solr_doc = Hash.new) ⇒ `Object`

This method extracts the facet categories from the given Fedora object’s external tag datastream

# File 'lib/solrizer/fedora/indexer.rb', line 108

def extract_xml_to_solr( obj, ds_name, solr_doc=Hash.new )
  xml_ds = Repository.get_datastream( obj, ds_name )
  extractor.xml_to_solr( xml_ds.content, solr_doc )
end

#generate_dates(solr_doc) ⇒ `Object`

This method generates the month and day facets from the date_t in solr_doc

# File 'lib/solrizer/fedora/indexer.rb', line 125

def generate_dates(solr_doc)
  
  # This will check for valid dates, but it seems most of the dates are currently invalid....
  #date_check =  /^(19|20)\d\d([- \/.])(0[1-9]|1[012])\2(0[1-9]|[12][0-9]|3[01])/

 #if there is not date_t, add on with easy-to-find value
 if solr_doc[:date_t].nil?
      ::Solrizer::Extractor.insert_solr_field_value(solr_doc, :date_t, "9999-99-99")
 end #if

  # Grab the date value from date_t regardless of wheter it is inside of an array
  # then convert it to a Date object
  date_value =    solr_doc[:date_t]
  if date_value.kind_of? Array
    date_value = date_value.first
  end
  date_obj = Date._parse(date_value)
  
  if date_obj[:mon].nil? 
     ::Solrizer::Extractor.insert_solr_field_value(solr_doc, :month_facet, "99")
  elsif 0 < date_obj[:mon] && date_obj[:mon] < 13
    ::Solrizer::Extractor.insert_solr_field_value(solr_doc, :month_facet, date_obj[:mon].to_s.rjust(2, '0'))
  else
    ::Solrizer::Extractor.insert_solr_field_value(solr_doc, :month_facet, "99")
  end
    
  if  date_obj[:mday].nil?
    ::Solrizer::Extractor.insert_solr_field_value(solr_doc, :day_facet, "99")
  elsif 0 < date_obj[:mday] && date_obj[:mday] < 32   
    ::Solrizer::Extractor.insert_solr_field_value(solr_doc, :day_facet, date_obj[:mday].to_s.rjust(2, '0'))
  else
     ::Solrizer::Extractor.insert_solr_field_value(solr_doc, :day_facet, "99")
  end
  
  return solr_doc
      
end

#index(obj) ⇒ `Object`

This method adds a document to the Solr search index

# File 'lib/solrizer/fedora/indexer.rb', line 205

def index( obj )
 # print "Indexing '#{obj.pid}'..."
  begin
    
    solr_doc = create_document( obj )
    
    begin
      solr.add( solr_doc )
      solr.commit
    # rescue
    #   debugger
    end
 
   # puts solr.url
   #puts solr_doc
   #  puts "done"
 
  # rescue Exception => e
  #    p "unable to index #{obj.pid}.  Failed with #{e.inspect}"
  end
 
end

#query(query_str) ⇒ `Object`

This method queries the Solr search index and returns a response



231
232
233

# File 'lib/solrizer/fedora/indexer.rb', line 231

def query( query_str )
  response = conn.query( query_str )
end

Class: Solrizer::Fedora::Indexer

Constant Summary collapse

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(opts = {}) ⇒ Indexer

Instance Attribute Details

#extractor ⇒ Object

#index_full_text ⇒ Object

#solr ⇒ Object

Class Method Details

.unique_id ⇒ Object

Instance Method Details

#class_exists?(class_name) ⇒ Boolean

#extract_rels_ext(obj, ds_name, solr_doc = Hash.new) ⇒ Object

#extract_xml_to_solr(obj, ds_name, solr_doc = Hash.new) ⇒ Object

#generate_dates(solr_doc) ⇒ Object

#index(obj) ⇒ Object

#query(query_str) ⇒ Object