Class: Ferret::Index::MultiReader

Inherits:
IndexReader show all
Defined in:
lib/ferret/index/multi_reader.rb

Overview

An IndexReader which reads multiple indexes, appending their content.

Constant Summary

Constants inherited from IndexReader

IndexReader::FILENAME_EXTENSIONS

Instance Attribute Summary collapse

Attributes inherited from IndexReader

#directory

Instance Method Summary collapse

Methods inherited from IndexReader

#acquire_write_lock, #close, #commit, #delete, #delete_docs_with_term, get_current_version, #get_document_with_term, index_exists?, #latest?, open, #set_norm, #term_docs_for, #term_positions_for, #undelete_all

Constructor Details

#initialize(sub_readers, directory = nil, sis = nil, close_dir = false) ⇒ MultiReader

Construct a MultiReader aggregating the named set of (sub)readers. Directory locking for delete, undeleteAll, and set_norm operations is left to the subreaders.

Note that all subreaders are closed if this Multireader is closed.

sub_readers

set of (sub)readers

raises

IOException



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# File 'lib/ferret/index/multi_reader.rb', line 13

def initialize(sub_readers, directory = nil, sis = nil, close_dir = false)
  if (directory)
    super(directory, sis, close_dir)
  else
    super(sub_readers.length == 0 ? nil : sub_readers[0].directory())
  end

  @max_doc = 0
  @num_docs = -1
  @has_deletions = false

  @sub_readers = sub_readers
  @starts = Array.new(@sub_readers.length + 1)    # build starts array
  @sub_readers.each_with_index do |sub_reader, i|
    @starts[i] = @max_doc
    @max_doc += sub_reader.max_doc # compute max_docs

    if @sub_readers[i].has_deletions?
      @has_deletions = true
    end
  end
  @starts[@sub_readers.length] = @max_doc
  @norms_cache = {}
end

Instance Attribute Details

#max_docObject (readonly)

Returns the value of attribute max_doc.



4
5
6
# File 'lib/ferret/index/multi_reader.rb', line 4

def max_doc
  @max_doc
end

Instance Method Details

#deleted?(n) ⇒ Boolean

Returns:

  • (Boolean)


70
71
72
73
# File 'lib/ferret/index/multi_reader.rb', line 70

def deleted?(n) 
  i = reader_index(n)                             # find segment num
  return @sub_readers[i].deleted?(n - @starts[i]) # dispatch to segment reader
end

#do_closeObject



185
186
187
188
189
# File 'lib/ferret/index/multi_reader.rb', line 185

def do_close()
  synchronize do
    @sub_readers.each {|reader| reader.close() }
  end
end

#do_commitObject



181
182
183
# File 'lib/ferret/index/multi_reader.rb', line 181

def do_commit()
  @sub_readers.each {|reader| reader.commit() }
end

#do_delete(n) ⇒ Object



79
80
81
82
83
84
# File 'lib/ferret/index/multi_reader.rb', line 79

def do_delete(n)
  @num_docs = -1                         # invalidate cache
  i = reader_index(n)                    # find segment num
  @sub_readers[i].delete(n - @starts[i]) # dispatch to segment reader
  @has_deletions = true
end

#do_set_norm(n, field, value) ⇒ Object



153
154
155
156
157
# File 'lib/ferret/index/multi_reader.rb', line 153

def do_set_norm(n, field, value)
  @norms_cache.delete(field)                   # clear cache
  i = reader_index(n)                          # find segment num
  @sub_readers[i].set_norm(n-@starts[i], field, value); # dispatch
end

#do_undelete_allObject



86
87
88
89
90
# File 'lib/ferret/index/multi_reader.rb', line 86

def do_undelete_all()
  @num_docs = -1                         # invalidate cache
  @sub_readers.each {|reader| reader.undelete_all() }
  @has_deletions = false
end

#doc_freq(t) ⇒ Object



167
168
169
170
171
# File 'lib/ferret/index/multi_reader.rb', line 167

def doc_freq(t)
  total = 0          # sum freqs in segments
  @sub_readers.each {|reader| total += reader.doc_freq(t)}
  return total
end

#fake_normsObject



118
119
120
# File 'lib/ferret/index/multi_reader.rb', line 118

def fake_norms()
  return @ones ||= SegmentReader.create_fake_norms(max_doc())
end

#get_document(n) ⇒ Object



65
66
67
68
# File 'lib/ferret/index/multi_reader.rb', line 65

def get_document(n)
  i = reader_index(n)                                 # find segment num
  return @sub_readers[i].get_document(n - @starts[i]) # dispatch to segment reader
end

#get_field_names(field_option = IndexReader::FieldOption::ALL) ⇒ Object

See IndexReader#get_field_names



192
193
194
195
196
197
198
199
# File 'lib/ferret/index/multi_reader.rb', line 192

def get_field_names(field_option = IndexReader::FieldOption::ALL)
  # maintain a unique set of field names
  field_set = Set.new
  @sub_readers.each do |reader|
    field_set |= reader.get_field_names(field_option)
  end
  return field_set
end

#get_norms(field) ⇒ Object



122
123
124
125
126
127
128
129
130
131
132
133
134
135
# File 'lib/ferret/index/multi_reader.rb', line 122

def get_norms(field)
  synchronize do
    bytes = @norms_cache[field]
    return bytes if bytes
    return fake_norms if not has_norms?(field)

    bytes = " " * @max_doc
    @sub_readers.length.times do |i|
      @sub_readers[i].get_norms_into(field, bytes, @starts[i])
    end
    @norms_cache[field] = bytes      # update cache
    return bytes
  end
end

#get_norms_into(field, buf, offset) ⇒ Object



137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
# File 'lib/ferret/index/multi_reader.rb', line 137

def get_norms_into(field, buf, offset)
  synchronize do
    bytes = @norms_cache[field]
    bytes = fake_norms() if (bytes.nil? and not has_norms?(field))

    if (bytes)                       # cache hit
      buf[offset ,@max_doc] = bytes[0, @max_doc]
      return
    end

    @sub_readers.length.times do |i|
      @sub_readers[i].get_norms_into(field, buf, offset + @starts[i])
    end
  end
end

#get_term_vector(n, field) ⇒ Object



49
50
51
52
# File 'lib/ferret/index/multi_reader.rb', line 49

def get_term_vector(n, field)
   i = reader_index(n)        # find segment num
  return @sub_readers[i].get_term_vector(n - @starts[i], field)
end

#get_term_vectors(n) ⇒ Object

Return an array of term frequency vectors for the specified document. The array contains a vector for each vectorized field in the document. Each vector vector contains term numbers and frequencies for all terms in a given vectorized field. If no such fields existed, the method returns nil.



44
45
46
47
# File 'lib/ferret/index/multi_reader.rb', line 44

def get_term_vectors(n)
  i = reader_index(n)        # find segment num
  return @sub_readers[i].get_term_vectors(n - @starts[i]); # dispatch to segment
end

#has_deletions?Boolean

Returns:

  • (Boolean)


75
76
77
# File 'lib/ferret/index/multi_reader.rb', line 75

def has_deletions?()
  return @has_deletions
end

#has_norms?(field) ⇒ Boolean

Returns:

  • (Boolean)


113
114
115
116
# File 'lib/ferret/index/multi_reader.rb', line 113

def has_norms?(field)
  @sub_readers.each {|reader| return true if reader.has_norms?(field)}
  return false
end

#num_docsObject



54
55
56
57
58
59
60
61
62
63
# File 'lib/ferret/index/multi_reader.rb', line 54

def num_docs() 
  synchronize do 
    if (@num_docs == -1) # check cache
      n = 0                # cache miss -= 1recompute
      @sub_readers.each {|reader| n += reader.num_docs()}
      @num_docs = n
    end
    return @num_docs
  end
end

#reader_index(n) ⇒ Object

find reader for doc n:



92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# File 'lib/ferret/index/multi_reader.rb', line 92

def reader_index(n) # find reader for doc n:
  lo = 0                       # search @starts array
  hi = @sub_readers.length - 1 # for first element less

  while (hi >= lo) 
    mid = (lo + hi) >> 1
    mid_value = @starts[mid]
    if (n < mid_value)
      hi = mid - 1
    elsif (n > mid_value)
      lo = mid + 1
    else # found a match
      while (mid+1 < @sub_readers.length and @starts[mid+1] == mid_value) 
        mid += 1 # scan to last match
      end
      return mid
    end
  end
  return hi
end

#term_docsObject



173
174
175
# File 'lib/ferret/index/multi_reader.rb', line 173

def term_docs()
  return MultiTermDocEnum.new(@sub_readers, @starts)
end

#term_positionsObject



177
178
179
# File 'lib/ferret/index/multi_reader.rb', line 177

def term_positions()
  return MultiTermDocPosEnum.new(@sub_readers, @starts)
end

#termsObject



159
160
161
# File 'lib/ferret/index/multi_reader.rb', line 159

def terms()
  return MultiTermEnum.new(@sub_readers, @starts, nil)
end

#terms_from(term) ⇒ Object



163
164
165
# File 'lib/ferret/index/multi_reader.rb', line 163

def terms_from(term)
  return MultiTermEnum.new(@sub_readers, @starts, term)
end