Class: RDig::Index::Indexer

Inherits:
Object
  • Object
show all
Includes:
MonitorMixin
Defined in:
lib/rdig/index.rb

Overview

used by the crawler to build the ferret index

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(settings) ⇒ Indexer

Returns a new instance of Indexer.



10
11
12
13
14
15
16
17
18
# File 'lib/rdig/index.rb', line 10

def initialize(settings)
  @indexed_documents = 0
  @config = settings
  @index_writer = Ferret::Index::IndexWriter.new(
                    :path     => settings.path,
                    :create   => settings.create,
                    :analyzer => settings.analyzer)
  super() # scary, MonitorMixin won't initialize if we don't call super() here (parens matter)
end

Instance Attribute Details

#indexed_documentsObject (readonly)

Returns the value of attribute indexed_documents.



8
9
10
# File 'lib/rdig/index.rb', line 8

def indexed_documents
  @indexed_documents
end

Instance Method Details

#add_to_index(document) ⇒ Object Also known as: <<



20
21
22
23
24
25
26
27
28
29
30
31
32
33
# File 'lib/rdig/index.rb', line 20

def add_to_index(document)
  RDig.logger.debug "add to index: #{document.uri.to_s}"
  @config.rewrite_uri.call(document.uri) if @config.rewrite_uri
  # all stored and tokenized, should be ferret defaults
  doc = { 
    :url   => document.uri.to_s,
    :title => document.title,
    :data  => document.body
  }
  synchronize do
    @index_writer << doc
    @indexed_documents += 1
  end
end

#closeObject



36
37
38
39
40
# File 'lib/rdig/index.rb', line 36

def close
  @index_writer.optimize
  @index_writer.close
  @index_writer = nil
end