Class: Ferret::Index::DocumentWriter
- Inherits:
-
Object
- Object
- Ferret::Index::DocumentWriter
- Defined in:
- lib/ferret/index/document_writer.rb
Instance Attribute Summary collapse
-
#info_stream ⇒ Object
writeonly
If non-nil, a message will be printed to this if max_field_length is reached.
Instance Method Summary collapse
- #add_document(segment, doc) ⇒ Object
-
#initialize(directory, analyzer, similarity, max_field_length, term_index_interval = IndexWriter::DEFAULT_TERM_INDEX_INTERVAL) ⇒ DocumentWriter
constructor
- directory
- The directory to write the document information to analyzer
- The analyzer to use for the document similarity
- The Similarity function writer.similarity max_field_length
- The maximum number of tokens a field may have writer.max_field_length term_index_interval
-
The interval of terms in the index writer.max_field_length.
Constructor Details
#initialize(directory, analyzer, similarity, max_field_length, term_index_interval = IndexWriter::DEFAULT_TERM_INDEX_INTERVAL) ⇒ DocumentWriter
- directory
-
The directory to write the document information to
- analyzer
-
The analyzer to use for the document
- similarity
-
The Similarity function writer.similarity
- max_field_length
-
The maximum number of tokens a field may have writer.max_field_length
- term_index_interval
-
The interval of terms in the index writer.max_field_length
17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 |
# File 'lib/ferret/index/document_writer.rb', line 17 def initialize(directory, analyzer, similarity, max_field_length, term_index_interval = IndexWriter::DEFAULT_TERM_INDEX_INTERVAL) @directory = directory @analyzer = analyzer @similarity = similarity @max_field_length = max_field_length @term_index_interval = term_index_interval # Keys are Terms, values are Postings. # Used to buffer a document before it is written to the index. @posting_table = {} @term_buffer = Term.new("", "") end |
Instance Attribute Details
#info_stream=(value) ⇒ Object (writeonly)
If non-nil, a message will be printed to this if max_field_length is reached.
8 9 10 |
# File 'lib/ferret/index/document_writer.rb', line 8 def info_stream=(value) @info_stream = value end |
Instance Method Details
#add_document(segment, doc) ⇒ Object
35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
# File 'lib/ferret/index/document_writer.rb', line 35 def add_document(segment, doc) # write field names @field_infos = FieldInfos.new() @field_infos << doc @field_infos.write_to_dir(@directory, segment + ".fnm") # write field values fields_writer = FieldsWriter.new(@directory, segment, @field_infos) begin fields_writer.add_document(doc) ensure fields_writer.close() end # invert doc into posting_table @posting_table.clear(); # clear posting_table arr_size = @field_infos.size @field_lengths = Array.new(arr_size, 0) # init field_lengths @field_positions = Array.new(arr_size, 0) # init field_positions @field_offsets = Array.new(arr_size, 0) # init field_offsets @field_boosts = Array.new(arr_size, doc.boost) # init field_boosts invert_document(doc) # sort posting_table into an array postings = sort_posting_table() # for (int i = 0; i < postings.length; i += 1) # Posting posting = postings[i] # print(posting.term) # print(" freq=" + posting.freq) # print(" pos=") # print(posting.positions[0]) # for (int j = 1; j < posting.freq; j += 1) # print("," + posting.positions[j]) # puts("") # end # write postings write_postings(postings, segment) # write norms of indexed fields write_norms(segment) end |