Class: Moonstone::Engine

Inherits:
Object
  • Object
show all
Includes:
Lucene::Document, Lucene::Index, Lucene::Search
Defined in:
lib/moonstone/engine.rb,
lib/moonstone/index_inspection.rb

Constant Summary

Constants included from Lucene::Document

Lucene::Document::Doc

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(options = {}) ⇒ Engine

:store should be a String or some kind of Lucene::Store::Directory



10
11
12
13
# File 'lib/moonstone/engine.rb', line 10

def initialize(options = {})
  @store = options[:store] || Lucene::Store::RAMDirectory.new
  @inspect = options[:inspect]
end

Instance Attribute Details

#similarityObject (readonly)

Returns the value of attribute similarity.



7
8
9
# File 'lib/moonstone/engine.rb', line 7

def similarity
  @similarity
end

#storeObject (readonly)

Returns the value of attribute store.



7
8
9
# File 'lib/moonstone/engine.rb', line 7

def store
  @store
end

Instance Method Details

#analyzerObject

Returns an instance of the Analyzer class defined within this class’s namespace.



153
154
155
# File 'lib/moonstone/engine.rb', line 153

def analyzer
  @analyzer ||= (defined?(self.class::Analyzer) ? self.class::Analyzer.new : Lucene::Analysis::StandardAnalyzer.new )
end

#closeObject



146
147
148
149
# File 'lib/moonstone/engine.rb', line 146

def close
  @searcher.close if @searcher
  @reader.close if @reader
end

#delete_document(term) ⇒ Object



105
106
107
# File 'lib/moonstone/engine.rb', line 105

def delete_document(term)
  delete_documents([term])
end

#delete_documents(terms) ⇒ Object

terms should be an enumerable set of hashes, with fields :field and :value, which combine to make a term to match documents to delete



95
96
97
98
99
100
101
102
103
# File 'lib/moonstone/engine.rb', line 95

def delete_documents(terms)
  IndexWriter.open(@store, analyzer) do |writer|
    terms.each do |t|
      term = Term.new(t[:field], t[:value])
      writer.deleteDocuments(term)
    end
  end
  refresh_searcher
end

#doc_countObject



48
49
50
51
# File 'lib/moonstone/engine.rb', line 48

def doc_count
  @reader ||= IndexReader.open(@store)
  @reader.max_doc
end

#doc_from(record) ⇒ Object

Analyze and index all fields. Override this method for custom behavior.



159
160
161
# File 'lib/moonstone/engine.rb', line 159

def doc_from(record)
  Doc.create(record)
end

#document(id) ⇒ Object



53
54
55
56
57
58
59
60
61
# File 'lib/moonstone/engine.rb', line 53

def document(id)
  @reader ||= IndexReader.open(@store)
  if id < @reader.max_doc
    doc = @reader.document(id) 
    doc.tokens = tokens_for_doc(id)
    doc.id = id
    doc
  end
end

#index(source, optimize = true) ⇒ Object

The source should be enumerable.



16
17
18
19
20
21
22
23
24
25
26
27
28
29
# File 'lib/moonstone/engine.rb', line 16

def index(source, optimize=true)
  IndexWriter.open(@store, analyzer) do |writer|
    writer.set_similarity(@similarity.new) if @similarity
    
    source.each_with_index do |record, i|
      doc = doc_from(record)
      writer.add_document(doc) if doc
      Moonstone::Logger.info "Indexed #{i+1} records" if (i+1)%1000 == 0
    end
    writer.optimize if optimize
    yield writer if block_given? #For post-processing stuff where you still need access to the writer
  end
  refresh_searcher
end

#index_metadataObject



43
44
45
46
# File 'lib/moonstone/engine.rb', line 43

def 
  query = TermQuery.new 'metadata', 'index'
  @index_metadata ||= search(query).last
end

#insert_document(source, optimize = false) ⇒ Object



69
70
71
# File 'lib/moonstone/engine.rb', line 69

def insert_document(source, optimize=false)
  insert_documents([source], optimize)
end

#insert_documents(source, optimize = false) ⇒ Object

Adds docs to index. docs must be an enumerable set of such objects that doc_from can turn into a document



64
65
66
67
# File 'lib/moonstone/engine.rb', line 64

def insert_documents(source, optimize=false)
  index(source, optimize)
  refresh_searcher
end

#inspect_mode?Boolean

Returns:

  • (Boolean)


195
196
197
# File 'lib/moonstone/engine.rb', line 195

def inspect_mode?
  @inspect
end

#optimizeObject



109
110
111
112
113
114
# File 'lib/moonstone/engine.rb', line 109

def optimize
  IndexWriter.open(@store, analyzer) do |writer|
    writer.optimize
  end
  refresh_searcher
end

#parser(field, analyzer = nil) ⇒ Object



190
191
192
193
# File 'lib/moonstone/engine.rb', line 190

def parser(field, analyzer = nil)
  @parser ||= {}
  @parser[field.to_sym] ||= Lucene::QueryParser::Parser.new(field, analyzer || self.analyzer)
end

#reader {|reader| ... } ⇒ Object

Opens an IndexReader for the duration of the block.

engine.reader { |r| r.terms }

Yields:



183
184
185
186
187
# File 'lib/moonstone/engine.rb', line 183

def reader
  reader = IndexReader.open(@store)
    yield reader
  reader.close
end

#refresh_searcherObject

Reopen the searcher (used when the index has changed)



142
143
144
# File 'lib/moonstone/engine.rb', line 142

def refresh_searcher
  @searcher = IndexSearcher.new(@store) if @searcher  #If it's nil, it'll get lazy loaded
end

#search(input, options = {}) ⇒ Object

Takes any kind of input object parsable by your #create_query method. Quack.

Options patterns (see javadoc for org.apache.lucene.search.Searcher): Returns a TopDocs object Note that Hits is deprecated so the versions of search() returning a Hits object are not implemented



120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# File 'lib/moonstone/engine.rb', line 120

def search(input, options = {})
  query = input.kind_of?(Lucene::Search::Query) ? input : create_query(input)
  @searcher ||= IndexSearcher.new(@store)
  top_docs = if (hit_collector = options[:hit_collector])
    args = [ options[:filter], hit_collector ].compact
    @searcher.search(query, *args)
    hit_collector.topDocs
  else
    options[:limit] ||= 25
    options[:offset] ||= 0
    args = [ options[:filter], (options[:limit] + options[:offset]) ]  #Always include both of these, even if nil
    args << options[:sort] if options[:sort]
    @searcher.search(query, *args).offset!(options[:offset])
  end
  top_docs.each(@searcher) do |doc| 
    doc.tokens = self.tokens_for_doc(doc) if inspect_mode?
    yield doc if block_given?
  end
  top_docs
end

#searcherObject

Opens an IndexSearcher for the duration of the block.

engine.searcher { |s| s.search(query_object) }


174
175
176
177
178
179
# File 'lib/moonstone/engine.rb', line 174

def searcher
  IndexSearcher.open(@store) do |searcher|
    searcher.set_similarity(@similarity.new) if @similarity
    yield searcher
  end
end

#stamp_metadataObject



31
32
33
34
35
36
37
38
39
40
41
# File 'lib/moonstone/engine.rb', line 31

def 
   = Lucene::Document::Doc.new
  .add_field 'metadata', 'index', :index => :not_analyzed
  .add_field 'build_date', Date.today.strftime("%Y-%m-%d"), :index => false
  .add_field 'engine_name', self.class.name, :index => false
  .add_field 'engine_version', `git show-ref -s --abbrev HEAD`.chomp, :index => false
  .add_field 'query_conditions', ENV['query_conditions'].to_s, :index => false
  writer do |w|
    w.add_document()
  end
end

#tokens_for_doc(doc, fields = nil) ⇒ Object

Return a hash of tokens, keyed on field name, for the given doc. Doc can be either a Document, or the integer document id. Note that if it is a Document, doc.id cannot be nil



8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# File 'lib/moonstone/index_inspection.rb', line 8

def tokens_for_doc(doc, fields = nil)
  tokens = {}
  self.reader do |reader|
    unless doc.kind_of?(Lucene::Document::Doc)
      doc_id = doc
      doc = reader.document(doc)
      doc.id = doc_id
    end
    fields = doc.keys if fields.nil?
    fields.each do |field|
      tokens[field] = []
      tfv = reader.getTermFreqVector(doc.id, field)
      if tfv && tfv.size > 0 && tfv.respond_to?(:getTermPositions)
        tv = tfv.getTerms
        tv.length.times do |i|
          positions = tfv.getTermPositions(i) || []
          positions.each { |pos| tokens[field][pos] = tv[i]}
        end
      end 
    end
  end
  tokens
end

#tokens_for_field(doc, field) ⇒ Object

Helper, delegates to tokens_for_doc



33
34
35
# File 'lib/moonstone/index_inspection.rb', line 33

def tokens_for_field(doc, field)
  tokens_for_doc(doc, [field])[field]
end

#update_document(doc) ⇒ Object



89
90
91
# File 'lib/moonstone/engine.rb', line 89

def update_document(doc)
  update_documents([doc])
end

#update_documents(docs) ⇒ Object

docs must be enumerable set of hashes, with fields :field, :value, :document (where field and value combine to make a term to match documents to replace)



76
77
78
79
80
81
82
83
84
85
86
87
# File 'lib/moonstone/engine.rb', line 76

def update_documents(docs)
  IndexWriter.open(@store, analyzer) do |writer|
    writer.set_similarity(@similarity.new) if @similarity
    docs.each do |doc|
      raise "Invalid arguments" unless doc[:field] && doc[:value] && doc[:document]
      term = Term.new(doc[:field], doc[:value])
      document = doc_from(doc[:document])
      writer.updateDocument(term, document)
    end
  end
  refresh_searcher
end

#writerObject

Opens an IndexWriter for the duration of the block.

engine.writer { |w| w.add_document(doc) }


165
166
167
168
169
170
# File 'lib/moonstone/engine.rb', line 165

def writer
  IndexWriter.open(@store, analyzer) do |writer|
    writer.set_similarity(@similarity.new) if @similarity
    yield writer
  end
end