Class: Traject::Indexer

Inherits:
Object
  • Object
show all
Includes:
Macros::Basic, Macros::Marc21, QualifiedConstGet
Defined in:
lib/traject/indexer.rb

Overview

with a String name of class meeting the Writer contract.

Defined Under Namespace

Classes: Context, Settings

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Methods included from Macros::Basic

#literal

Methods included from Macros::Marc21

#extract_all_marc_values, #extract_marc, first!, #serialized_marc, trim_punctuation

Methods included from QualifiedConstGet

#qualified_const_get

Constructor Details

#initializeIndexer

Returns a new instance of Indexer.



58
59
60
61
# File 'lib/traject/indexer.rb', line 58

def initialize
  @settings = Settings.new(self.class.default_settings)
  @index_steps = []
end

Instance Attribute Details

#reader_classObject



145
146
147
148
149
150
# File 'lib/traject/indexer.rb', line 145

def reader_class
  unless defined? @reader_class
    @reader_class = qualified_const_get(settings["reader_class_name"])
  end
  return @reader_class
end

#writer_classObject



152
153
154
155
156
157
# File 'lib/traject/indexer.rb', line 152

def writer_class
  unless defined? @writer_class
    @writer_class = qualified_const_get(settings["writer_class_name"])
  end
  return @writer_class
end

Class Method Details

.default_settingsObject



170
171
172
173
174
175
# File 'lib/traject/indexer.rb', line 170

def self.default_settings
  {
    "reader_class_name" => "Traject::MarcReader",
    "writer_class_name" => "Traject::SolrJWriter"
  }
end

Instance Method Details

#map_record(record) ⇒ Object

Processes a single record, according to indexing rules set up in this Indexer. Returns a hash whose values are Arrays, and keys are strings.



104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# File 'lib/traject/indexer.rb', line 104

def map_record(record)
  context = Context.new(:source_record => record, :settings => settings)

  @index_steps.each do |index_step|
    accumulator = []
    field_name  = index_step[:field_name]
    context.field_name = field_name

    # Might have a lambda arg AND a block, we execute in order,
    # with same accumulator.
    [index_step[:lambda], index_step[:block]].each do |aProc|
      if aProc
        case aProc.arity
        when 1 then aProc.call(record)
        when 2 then aProc.call(record, accumulator)
        else        aProc.call(record, accumulator, context)
        end
      end

    end

    (context.output_hash[field_name] ||= []).concat accumulator
    context.field_name = nil
  end

  return context.output_hash
end

#process(io_stream) ⇒ Object

Processes a stream of records, reading from the configured Reader, mapping according to configured mapping rules, and then writing to configured Writer.



135
136
137
138
139
140
141
142
143
# File 'lib/traject/indexer.rb', line 135

def process(io_stream)
  reader = self.reader!(io_stream)
  writer = self.writer!

  reader.each do |record|
    writer.put map_record(record)
  end
  writer.close if writer.respond_to?(:close)
end

#reader!(io_stream) ⇒ Object

Instantiate a Traject Reader, using class set in #reader_class, initialized with io_stream passed in



161
162
163
# File 'lib/traject/indexer.rb', line 161

def reader!(io_stream)
  return reader_class.new(io_stream, settings)
end

#settings(new_settings = nil, &block) ⇒ Object

The Indexer’s settings are a hash of key/values – not nested, just one level – of configuration settings. Keys are strings.

The settings method with no arguments returns that hash.

With a hash and/or block argument, can be used to set new key/values. Each call merges onto the existing settings hash.

indexer.settings("a" => "a", "b" => "b")

indexer.settings do
  store "b", "new b"
end

indexer.settings #=> {"a" => "a", "b" => "new b"}

even with arguments, returns settings hash too, so can be chained.



83
84
85
86
87
88
89
# File 'lib/traject/indexer.rb', line 83

def settings(new_settings = nil, &block)
  @settings.merge!(new_settings) if new_settings

  @settings.instance_eval &block if block

  return @settings
end

#to_field(field_name, aLambda = nil, &block) ⇒ Object

Used to define an indexing mapping.



92
93
94
95
96
97
98
# File 'lib/traject/indexer.rb', line 92

def to_field(field_name, aLambda = nil, &block)
  @index_steps << {
    :field_name => field_name.to_s,
    :lambda => aLambda,
    :block  => block
  }
end

#writer!Object

Instantiate a Traject Writer, suing class set in #writer_class



166
167
168
# File 'lib/traject/indexer.rb', line 166

def writer!
  return writer_class.new(settings)
end