Class: Moonstone::Engine
- Inherits:
-
Object
- Object
- Moonstone::Engine
- Includes:
- Lucene::Document, Lucene::Index, Lucene::Search
- Defined in:
- lib/moonstone/engine.rb,
lib/moonstone/index_inspection.rb
Constant Summary
Constants included from Lucene::Document
Instance Attribute Summary collapse
-
#similarity ⇒ Object
readonly
Returns the value of attribute similarity.
-
#store ⇒ Object
readonly
Returns the value of attribute store.
Instance Method Summary collapse
-
#analyzer ⇒ Object
Returns an instance of the Analyzer class defined within this class’s namespace.
- #close ⇒ Object
- #delete_document(term) ⇒ Object
-
#delete_documents(terms) ⇒ Object
terms should be an enumerable set of hashes, with fields :field and :value, which combine to make a term to match documents to delete.
- #doc_count ⇒ Object
-
#doc_from(record) ⇒ Object
Analyze and index all fields.
- #document(id) ⇒ Object
-
#index(source, optimize = true) ⇒ Object
The source should be enumerable.
- #index_metadata ⇒ Object
-
#initialize(options = {}) ⇒ Engine
constructor
:store should be a String or some kind of Lucene::Store::Directory.
- #insert_document(source, optimize = false) ⇒ Object
-
#insert_documents(source, optimize = false) ⇒ Object
Adds docs to index.
- #inspect_mode? ⇒ Boolean
- #optimize ⇒ Object
- #parser(field, analyzer = nil) ⇒ Object
-
#reader {|reader| ... } ⇒ Object
Opens an IndexReader for the duration of the block.
-
#refresh_searcher ⇒ Object
Reopen the searcher (used when the index has changed).
-
#search(input, options = {}) ⇒ Object
Takes any kind of input object parsable by your #create_query method.
-
#searcher ⇒ Object
Opens an IndexSearcher for the duration of the block.
- #stamp_metadata ⇒ Object
-
#tokens_for_doc(doc, fields = nil) ⇒ Object
Return a hash of tokens, keyed on field name, for the given doc.
-
#tokens_for_field(doc, field) ⇒ Object
Helper, delegates to tokens_for_doc.
- #update_document(doc) ⇒ Object
-
#update_documents(docs) ⇒ Object
docs must be enumerable set of hashes, with fields :field, :value, :document (where field and value combine to make a term to match documents to replace).
-
#writer ⇒ Object
Opens an IndexWriter for the duration of the block.
Constructor Details
Instance Attribute Details
#similarity ⇒ Object (readonly)
Returns the value of attribute similarity.
7 8 9 |
# File 'lib/moonstone/engine.rb', line 7 def similarity @similarity end |
#store ⇒ Object (readonly)
Returns the value of attribute store.
7 8 9 |
# File 'lib/moonstone/engine.rb', line 7 def store @store end |
Instance Method Details
#analyzer ⇒ Object
Returns an instance of the Analyzer class defined within this class’s namespace.
153 154 155 |
# File 'lib/moonstone/engine.rb', line 153 def analyzer @analyzer ||= (defined?(self.class::Analyzer) ? self.class::Analyzer.new : Lucene::Analysis::StandardAnalyzer.new ) end |
#close ⇒ Object
146 147 148 149 |
# File 'lib/moonstone/engine.rb', line 146 def close @searcher.close if @searcher @reader.close if @reader end |
#delete_document(term) ⇒ Object
105 106 107 |
# File 'lib/moonstone/engine.rb', line 105 def delete_document(term) delete_documents([term]) end |
#delete_documents(terms) ⇒ Object
terms should be an enumerable set of hashes, with fields :field and :value, which combine to make a term to match documents to delete
95 96 97 98 99 100 101 102 103 |
# File 'lib/moonstone/engine.rb', line 95 def delete_documents(terms) IndexWriter.open(@store, analyzer) do |writer| terms.each do |t| term = Term.new(t[:field], t[:value]) writer.deleteDocuments(term) end end refresh_searcher end |
#doc_count ⇒ Object
48 49 50 51 |
# File 'lib/moonstone/engine.rb', line 48 def doc_count @reader ||= IndexReader.open(@store) @reader.max_doc end |
#doc_from(record) ⇒ Object
Analyze and index all fields. Override this method for custom behavior.
159 160 161 |
# File 'lib/moonstone/engine.rb', line 159 def doc_from(record) Doc.create(record) end |
#document(id) ⇒ Object
53 54 55 56 57 58 59 60 61 |
# File 'lib/moonstone/engine.rb', line 53 def document(id) @reader ||= IndexReader.open(@store) if id < @reader.max_doc doc = @reader.document(id) doc.tokens = tokens_for_doc(id) doc.id = id doc end end |
#index(source, optimize = true) ⇒ Object
The source should be enumerable.
16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
# File 'lib/moonstone/engine.rb', line 16 def index(source, optimize=true) IndexWriter.open(@store, analyzer) do |writer| writer.set_similarity(@similarity.new) if @similarity source.each_with_index do |record, i| doc = doc_from(record) writer.add_document(doc) if doc Moonstone::Logger.info "Indexed #{i+1} records" if (i+1)%1000 == 0 end writer.optimize if optimize yield writer if block_given? #For post-processing stuff where you still need access to the writer end refresh_searcher end |
#index_metadata ⇒ Object
43 44 45 46 |
# File 'lib/moonstone/engine.rb', line 43 def query = TermQuery.new 'metadata', 'index' @index_metadata ||= search(query).last end |
#insert_document(source, optimize = false) ⇒ Object
69 70 71 |
# File 'lib/moonstone/engine.rb', line 69 def insert_document(source, optimize=false) insert_documents([source], optimize) end |
#insert_documents(source, optimize = false) ⇒ Object
Adds docs to index. docs must be an enumerable set of such objects that doc_from can turn into a document
64 65 66 67 |
# File 'lib/moonstone/engine.rb', line 64 def insert_documents(source, optimize=false) index(source, optimize) refresh_searcher end |
#inspect_mode? ⇒ Boolean
195 196 197 |
# File 'lib/moonstone/engine.rb', line 195 def inspect_mode? @inspect end |
#optimize ⇒ Object
109 110 111 112 113 114 |
# File 'lib/moonstone/engine.rb', line 109 def optimize IndexWriter.open(@store, analyzer) do |writer| writer.optimize end refresh_searcher end |
#parser(field, analyzer = nil) ⇒ Object
190 191 192 193 |
# File 'lib/moonstone/engine.rb', line 190 def parser(field, analyzer = nil) @parser ||= {} @parser[field.to_sym] ||= Lucene::QueryParser::Parser.new(field, analyzer || self.analyzer) end |
#reader {|reader| ... } ⇒ Object
Opens an IndexReader for the duration of the block.
engine.reader { |r| r.terms }
183 184 185 186 187 |
# File 'lib/moonstone/engine.rb', line 183 def reader reader = IndexReader.open(@store) yield reader reader.close end |
#refresh_searcher ⇒ Object
Reopen the searcher (used when the index has changed)
142 143 144 |
# File 'lib/moonstone/engine.rb', line 142 def refresh_searcher @searcher = IndexSearcher.new(@store) if @searcher #If it's nil, it'll get lazy loaded end |
#search(input, options = {}) ⇒ Object
Takes any kind of input object parsable by your #create_query method. Quack.
Options patterns (see javadoc for org.apache.lucene.search.Searcher): Returns a TopDocs object Note that Hits is deprecated so the versions of search() returning a Hits object are not implemented
120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
# File 'lib/moonstone/engine.rb', line 120 def search(input, = {}) query = input.kind_of?(Lucene::Search::Query) ? input : create_query(input) @searcher ||= IndexSearcher.new(@store) top_docs = if (hit_collector = [:hit_collector]) args = [ [:filter], hit_collector ].compact @searcher.search(query, *args) hit_collector.topDocs else [:limit] ||= 25 [:offset] ||= 0 args = [ [:filter], ([:limit] + [:offset]) ] #Always include both of these, even if nil args << [:sort] if [:sort] @searcher.search(query, *args).offset!([:offset]) end top_docs.each(@searcher) do |doc| doc.tokens = self.tokens_for_doc(doc) if inspect_mode? yield doc if block_given? end top_docs end |
#searcher ⇒ Object
Opens an IndexSearcher for the duration of the block.
engine.searcher { |s| s.search(query_object) }
174 175 176 177 178 179 |
# File 'lib/moonstone/engine.rb', line 174 def searcher IndexSearcher.open(@store) do |searcher| searcher.set_similarity(@similarity.new) if @similarity yield searcher end end |
#stamp_metadata ⇒ Object
31 32 33 34 35 36 37 38 39 40 41 |
# File 'lib/moonstone/engine.rb', line 31 def = Lucene::Document::Doc.new .add_field 'metadata', 'index', :index => :not_analyzed .add_field 'build_date', Date.today.strftime("%Y-%m-%d"), :index => false .add_field 'engine_name', self.class.name, :index => false .add_field 'engine_version', `git show-ref -s --abbrev HEAD`.chomp, :index => false .add_field 'query_conditions', ENV['query_conditions'].to_s, :index => false writer do |w| w.add_document() end end |
#tokens_for_doc(doc, fields = nil) ⇒ Object
Return a hash of tokens, keyed on field name, for the given doc. Doc can be either a Document, or the integer document id. Note that if it is a Document, doc.id cannot be nil
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 |
# File 'lib/moonstone/index_inspection.rb', line 8 def tokens_for_doc(doc, fields = nil) tokens = {} self.reader do |reader| unless doc.kind_of?(Lucene::Document::Doc) doc_id = doc doc = reader.document(doc) doc.id = doc_id end fields = doc.keys if fields.nil? fields.each do |field| tokens[field] = [] tfv = reader.getTermFreqVector(doc.id, field) if tfv && tfv.size > 0 && tfv.respond_to?(:getTermPositions) tv = tfv.getTerms tv.length.times do |i| positions = tfv.getTermPositions(i) || [] positions.each { |pos| tokens[field][pos] = tv[i]} end end end end tokens end |
#tokens_for_field(doc, field) ⇒ Object
Helper, delegates to tokens_for_doc
33 34 35 |
# File 'lib/moonstone/index_inspection.rb', line 33 def tokens_for_field(doc, field) tokens_for_doc(doc, [field])[field] end |
#update_document(doc) ⇒ Object
89 90 91 |
# File 'lib/moonstone/engine.rb', line 89 def update_document(doc) update_documents([doc]) end |
#update_documents(docs) ⇒ Object
docs must be enumerable set of hashes, with fields :field, :value, :document (where field and value combine to make a term to match documents to replace)
76 77 78 79 80 81 82 83 84 85 86 87 |
# File 'lib/moonstone/engine.rb', line 76 def update_documents(docs) IndexWriter.open(@store, analyzer) do |writer| writer.set_similarity(@similarity.new) if @similarity docs.each do |doc| raise "Invalid arguments" unless doc[:field] && doc[:value] && doc[:document] term = Term.new(doc[:field], doc[:value]) document = doc_from(doc[:document]) writer.updateDocument(term, document) end end refresh_searcher end |
#writer ⇒ Object
Opens an IndexWriter for the duration of the block.
engine.writer { |w| w.add_document(doc) }
165 166 167 168 169 170 |
# File 'lib/moonstone/engine.rb', line 165 def writer IndexWriter.open(@store, analyzer) do |writer| writer.set_similarity(@similarity.new) if @similarity yield writer end end |