Class: Ferret::Index::SegmentTermDocEnum

Inherits:
TermDocEnum show all
Defined in:
lib/ferret/index/term_doc_enum.rb

Direct Known Subclasses

SegmentTermDocPosEnum

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(parent) ⇒ SegmentTermDocEnum

Returns a new instance of SegmentTermDocEnum.



60
61
62
63
64
65
66
67
# File 'lib/ferret/index/term_doc_enum.rb', line 60

def initialize(parent) 
  @parent = parent
  @freq_stream = parent.freq_stream.clone()
  @deleted_docs = parent.deleted_docs
  @skip_interval = parent.term_infos.skip_interval
  @skip_stream = nil
  @doc = 0
end

Instance Attribute Details

#countObject

Returns the value of attribute count.



58
59
60
# File 'lib/ferret/index/term_doc_enum.rb', line 58

def count
  @count
end

#deleted_docsObject

Returns the value of attribute deleted_docs.



58
59
60
# File 'lib/ferret/index/term_doc_enum.rb', line 58

def deleted_docs
  @deleted_docs
end

#dfObject

Returns the value of attribute df.



58
59
60
# File 'lib/ferret/index/term_doc_enum.rb', line 58

def df
  @df
end

#docObject

Returns the value of attribute doc.



58
59
60
# File 'lib/ferret/index/term_doc_enum.rb', line 58

def doc
  @doc
end

#freqObject

Returns the value of attribute freq.



58
59
60
# File 'lib/ferret/index/term_doc_enum.rb', line 58

def freq
  @freq
end

#freq_streamObject

Returns the value of attribute freq_stream.



58
59
60
# File 'lib/ferret/index/term_doc_enum.rb', line 58

def freq_stream
  @freq_stream
end

#parentObject

Returns the value of attribute parent.



58
59
60
# File 'lib/ferret/index/term_doc_enum.rb', line 58

def parent
  @parent
end

Instance Method Details

#closeObject



111
112
113
114
115
116
117
118
119
# File 'lib/ferret/index/term_doc_enum.rb', line 111

def close()
  @freq_stream.close()
  @freq_stream = nil
  if (@skip_stream != nil)
    @skip_stream.close()
    @skip_stream = nil
  end
  @parent = nil
end

#do_seek(ti) ⇒ Object



93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# File 'lib/ferret/index/term_doc_enum.rb', line 93

def do_seek(ti)
  @count = 0
  if (ti == nil) 
    @doc_freq = 0
  else 
    @doc_freq = ti.doc_freq
    @doc = 0
    @skip_doc = 0
    @skip_count = 0
    @num_skips = @doc_freq / @skip_interval
    @freq_pointer = ti.freq_pointer
    @prox_pointer = ti.prox_pointer
    @skip_pointer = @freq_pointer + ti.skip_offset
    @freq_stream.seek(@freq_pointer)
    @have_skipped = false
  end
end

#next?Boolean

Returns:

  • (Boolean)


124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
# File 'lib/ferret/index/term_doc_enum.rb', line 124

def next?()
  while (true) 
    return false if @count == @doc_freq

    doc_code = @freq_stream.read_vint()
    @doc += doc_code >> 1              # shift off low bit
    if ((doc_code & 1) != 0)           # if low bit is set
      @freq = 1                        # freq is one
    else
      @freq = @freq_stream.read_vint() # else read freq
    end

    @count += 1

    break if (@deleted_docs == nil or not @deleted_docs[@doc])
      
    skipping_doc()
  end
  return true
end

#read(docs, freqs, start = 0) ⇒ Object

Optimized implementation.



146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
# File 'lib/ferret/index/term_doc_enum.rb', line 146

def read(docs, freqs, start = 0)
  i = start
  needed = docs.length

  while (i < needed and @count < @doc_freq) 

    # manually inlined call to next?() for speed
    doc_code = @freq_stream.read_vint()
    @doc += doc_code >> 1              # shift off low bit
    if ((doc_code & 1) != 0)           # if low bit is set
      @freq = 1                        # freq is one
    else
      @freq = @freq_stream.read_vint() # else read freq
    end

    @count += 1

    if (@deleted_docs == nil or not @deleted_docs[@doc]) 
      docs[i] = @doc
      freqs[i] = @freq
      i += 1
    end

    skipping_doc()
  end
  return i
end

#seek(t) ⇒ Object

Find the term, TermEnum or TermInfo in the doc

t

can be a Term, TermEnum of TermInfo object



72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# File 'lib/ferret/index/term_doc_enum.rb', line 72

def seek(t)
  if t.instance_of?(Term)
    ti = parent.term_infos[t]
  elsif t.is_a?(TermEnum)
    # use comparison of fieldinfos to verify that term enum (t) belongs to the
    # same segment as this SegmentTermDocEnum
    if (t.instance_of?(SegmentTermEnum) and t.field_infos == parent.field_infos)
      ti = t.term_info()
    else                                          # punt case
      ti = parent.term_infos[t.term]
    end
  elsif t.is_a? TermInfo # this one is easy. That's exactly what we're looking for
    ti = t
  else
    raise ArgumentError, "Must pass a Term, TermEnum or TermInfo object, not a " +
      t.class.to_s
  end
  do_seek(ti)
  #puts "pos = #{@freq_stream.pos} ti = #{ti}"
end

#skip_prox(prox_pointer) ⇒ Object

Overridden by SegmentTermDocPosEnum to skip in prox stream.



175
176
# File 'lib/ferret/index/term_doc_enum.rb', line 175

def skip_prox(prox_pointer)
end

#skip_to(target) ⇒ Object

Optimized implementation.



179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
# File 'lib/ferret/index/term_doc_enum.rb', line 179

def skip_to(target)
  if (@doc_freq >= @skip_interval) # optimized case

    if (@skip_stream == nil)
      @skip_stream = @freq_stream.clone() # lazily clone
    end

    if (!@have_skipped) # lazily seek skip stream
      @skip_stream.seek(@skip_pointer)
      @have_skipped = true
    end

    # scan skip data
    last_skip_doc = @skip_doc
    last_freq_pointer = @freq_stream.pos()
    last_prox_pointer = -1
    num_skipped = -1 - (@count % @skip_interval)

    while (target > @skip_doc) 
      last_skip_doc = @skip_doc
      last_freq_pointer = @freq_pointer
      last_prox_pointer = @prox_pointer
      
      if (@skip_doc != 0 and @skip_doc >= @doc)
        num_skipped += @skip_interval
      end
      
      if(@skip_count >= @num_skips)
        break
      end

      @skip_doc += @skip_stream.read_vint()
      @freq_pointer += @skip_stream.read_vint()
      @prox_pointer += @skip_stream.read_vint()

      @skip_count += 1
    end
    
    # if we found something to skip, then skip it
    if (last_freq_pointer > @freq_stream.pos()) 
      @freq_stream.seek(last_freq_pointer)
      skip_prox(last_prox_pointer)

      @doc = last_skip_doc
      @count += num_skipped
    end

  end

  # done skipping, now just scan
  
  begin 
    if not next?
      return false
    end
  end while (target > @doc)
  return true
end

#skipping_docObject



121
122
# File 'lib/ferret/index/term_doc_enum.rb', line 121

def skipping_doc()
end