Class: Ferret::Index::DocumentWriter

Inherits:
Object
  • Object
show all
Defined in:
lib/ferret/index/document_writer.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(directory, analyzer, similarity, max_field_length, term_index_interval = IndexWriter::DEFAULT_TERM_INDEX_INTERVAL) ⇒ DocumentWriter

directory

The directory to write the document information to

analyzer

The analyzer to use for the document

similarity

The Similarity function writer.similarity

max_field_length

The maximum number of tokens a field may have writer.max_field_length

term_index_interval

The interval of terms in the index writer.max_field_length



17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# File 'lib/ferret/index/document_writer.rb', line 17

def initialize(directory,
               analyzer,
               similarity,
               max_field_length,
               term_index_interval = IndexWriter::DEFAULT_TERM_INDEX_INTERVAL) 
  @directory = directory
  @analyzer = analyzer
  @similarity = similarity
  @max_field_length = max_field_length
  @term_index_interval = term_index_interval

  # Keys are Terms, values are Postings.
  # Used to buffer a document before it is written to the index.
  @posting_table = {}

  @term_buffer = Term.new("", "")
end

Instance Attribute Details

#info_stream=(value) ⇒ Object (writeonly)

If non-nil, a message will be printed to this if max_field_length is reached.



8
9
10
# File 'lib/ferret/index/document_writer.rb', line 8

def info_stream=(value)
  @info_stream = value
end

Instance Method Details

#add_document(segment, doc) ⇒ Object



35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# File 'lib/ferret/index/document_writer.rb', line 35

def add_document(segment, doc)
       
  # write field names
  @field_infos = FieldInfos.new()
  @field_infos << doc
  @field_infos.write_to_dir(@directory, segment + ".fnm")

  # write field values
  fields_writer = FieldsWriter.new(@directory, segment, @field_infos)
  begin 
    fields_writer.add_document(doc)
  ensure 
    fields_writer.close()
  end

  # invert doc into posting_table
  @posting_table.clear();                    # clear posting_table
  arr_size = @field_infos.size
  @field_lengths = Array.new(arr_size, 0)    # init field_lengths
  @field_positions = Array.new(arr_size, 0)  # init field_positions
  @field_offsets = Array.new(arr_size, 0)    # init field_offsets
  @field_boosts = Array.new(arr_size, doc.boost) # init field_boosts

  invert_document(doc)

  # sort posting_table into an array
  postings = sort_posting_table()

  #    for (int i = 0; i < postings.length; i += 1) 
  #      Posting posting = postings[i]
  #      print(posting.term)
  #      print(" freq=" + posting.freq)
  #      print(" pos=")
  #      print(posting.positions[0])
  #      for (int j = 1; j < posting.freq; j += 1)
  #        print("," + posting.positions[j])
  #      puts("")
  #    end

  # write postings
  write_postings(postings, segment)

  # write norms of indexed fields
  write_norms(segment)

end