Class: Ferret::Index::TermInfosReader

Inherits:
Object
  • Object
show all
Includes:
MonitorMixin
Defined in:
lib/ferret/index/term_infos_io.rb

Overview

This stores a monotonically increasing set of <Term, TermInfo> pairs in a Directory. Pairs are accessed either by Term or by ordinal position the set.

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(dir, seg, fis) ⇒ TermInfosReader

Returns a new instance of TermInfosReader.



115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# File 'lib/ferret/index/term_infos_io.rb', line 115

def initialize(dir, seg, fis)
  super()

  @directory = dir
  @segment = seg
  @field_infos = fis

  @orig_enum = SegmentTermEnum.new(@directory.open_input(@segment + ".tis"),
                                   @field_infos, false)
  @size = @orig_enum.size
  @skip_interval = @orig_enum.skip_interval
  @index_enum = SegmentTermEnum.new(@directory.open_input(@segment + ".tii"),
                                   @field_infos, true)
  @index_terms = nil
  @index_infos = nil
  @index_pointers = nil
end

Instance Attribute Details

#sizeObject (readonly)

Returns the number of term/value pairs in the set.



140
141
142
# File 'lib/ferret/index/term_infos_io.rb', line 140

def size
  @size
end

#skip_intervalObject (readonly)

The skip interval for the original enumerator



142
143
144
# File 'lib/ferret/index/term_infos_io.rb', line 142

def skip_interval
  @skip_interval
end

Instance Method Details

#closeObject



133
134
135
136
137
# File 'lib/ferret/index/term_infos_io.rb', line 133

def close()
  # clear this threads cache 
  @orig_enum.close() if (@orig_enum != nil)
  @index_enum.close() if (@index_enum != nil)
end

#get_term(position) ⇒ Object

Returns the nth term in the set.



168
169
170
171
172
173
174
175
176
177
178
179
180
181
# File 'lib/ferret/index/term_infos_io.rb', line 168

def get_term(position)
  return nil if (@size == 0)

  e = enum()
  if (e != nil and
      e.term != nil and
      position >= e.position and
      position < (e.position + e.index_interval))
    return scan_for_term(position)      # can avoid seek
  end

  seek_enum((position / e.index_interval).to_i) # must seek
  return scan_for_term(position)
end

#get_term_info(term) ⇒ Object Also known as: []

Returns the TermInfo for a Term in the set, or nil.



146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
# File 'lib/ferret/index/term_infos_io.rb', line 146

def get_term_info(term)
  return nil if (@size == 0)

  ensure_index_is_read()

  # optimize sequential access: first try scanning cached enum w/o seeking
  e = enum()
  if e.term and term >= e.term
    enum_offset = (e.position / e.index_interval).to_i + 1
    if (@index_terms.length == enum_offset or
        term < @index_terms[enum_offset]) # but before end of block
      return scan_for_term_info(term)        # no need to seek
    end
  end

  # random-access: must seek
  seek_enum(get_index_offset(term))
  return scan_for_term_info(term)
end

#get_terms_position(term) ⇒ Object



183
184
185
186
187
188
189
190
191
192
193
194
# File 'lib/ferret/index/term_infos_io.rb', line 183

def get_terms_position(term)
  return nil if (@size == 0)
  ensure_index_is_read
  seek_enum(get_index_offset(term))

  e = enum()

  while term > e.term and e.next?
  end

  return term == e.term ? e.position : -1
end

#termsObject

Returns an enumeration of all the Terms and TermInfos in the set.



197
198
199
# File 'lib/ferret/index/term_infos_io.rb', line 197

def terms() 
  return @orig_enum.clone()
end

#terms_from(term) ⇒ Object

Returns an enumeration of terms starting at or after the named term.



202
203
204
205
# File 'lib/ferret/index/term_infos_io.rb', line 202

def terms_from(term)
  get_term_info(term)
  return enum().clone()
end