Class: Ferret::Index::SegmentReader
Overview
FIXME: Describe class SegmentReader here.
Defined Under Namespace
Classes: Norm
Constant Summary
Constants inherited
from IndexReader
IndexReader::FILENAME_EXTENSIONS
Instance Attribute Summary collapse
Attributes inherited from IndexReader
#directory
Class Method Summary
collapse
Instance Method Summary
collapse
Methods inherited from IndexReader
#acquire_write_lock, #close, #commit, #delete, #delete_docs_with_term, get_current_version, #get_document_with_term, index_exists?, #latest?, open, #set_norm, #term_docs_for, #term_positions_for, #undelete_all
Constructor Details
#initialize(dir, info, seg_infos, close, owner) ⇒ SegmentReader
Returns a new instance of SegmentReader.
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
|
# File 'lib/ferret/index/segment_reader.rb', line 14
def initialize(dir, info, seg_infos, close, owner)
super(dir, seg_infos, close, owner)
@segment = info.name
@cfs_reader = nil
dir = directory
if SegmentReader.uses_compound_file?(info)
@cfs_reader = CompoundFileReader.new(directory, @segment + '.cfs')
dir = @cfs_reader
end
@field_infos = FieldInfos.new(dir, @segment + '.fnm')
@fields_reader = FieldsReader.new(dir, @segment, @field_infos)
@term_infos = TermInfosReader.new(dir, @segment, @field_infos)
@deleted_docs = nil
@deleted_docs_dirty = false
if SegmentReader.has_deletions?(info) then
@deleted_docs =
Ferret::Utils::BitVector.read(directory, @segment + '.del')
end
@freq_stream = dir.open_input(@segment + '.frq')
@prox_stream = dir.open_input(@segment + '.prx')
@norms = {}
@norms.extend(MonitorMixin)
@norms_dirty = false
open_norms(dir)
@tv_reader_orig = nil
if @field_infos.has_vectors? then
@tv_reader_orig = TermVectorsReader.new(dir, @segment, @field_infos)
end
end
|
Instance Attribute Details
#deleted_docs ⇒ Object
Returns the value of attribute deleted_docs.
7
8
9
|
# File 'lib/ferret/index/segment_reader.rb', line 7
def deleted_docs
@deleted_docs
end
|
#field_infos ⇒ Object
Returns the value of attribute field_infos.
7
8
9
|
# File 'lib/ferret/index/segment_reader.rb', line 7
def field_infos
@field_infos
end
|
#freq_stream ⇒ Object
Returns the value of attribute freq_stream.
7
8
9
|
# File 'lib/ferret/index/segment_reader.rb', line 7
def freq_stream
@freq_stream
end
|
#prox_stream ⇒ Object
Returns the value of attribute prox_stream.
7
8
9
|
# File 'lib/ferret/index/segment_reader.rb', line 7
def prox_stream
@prox_stream
end
|
#segment ⇒ Object
Returns the value of attribute segment.
7
8
9
|
# File 'lib/ferret/index/segment_reader.rb', line 7
def segment
@segment
end
|
#term_infos ⇒ Object
Returns the value of attribute term_infos.
7
8
9
|
# File 'lib/ferret/index/segment_reader.rb', line 7
def term_infos
@term_infos
end
|
Class Method Details
.create_fake_norms(size) ⇒ Object
235
236
237
|
# File 'lib/ferret/index/segment_reader.rb', line 235
def SegmentReader.create_fake_norms(size)
Array.new(size, 1).pack("C*")
end
|
.get(info, infos = nil, close = false) ⇒ Object
10
11
12
|
# File 'lib/ferret/index/segment_reader.rb', line 10
def SegmentReader.get(info, infos = nil, close = false)
return SegmentReader.new(info.directory, info, infos, close, infos!=nil)
end
|
.has_deletions?(si) ⇒ Boolean
86
87
88
|
# File 'lib/ferret/index/segment_reader.rb', line 86
def SegmentReader.has_deletions?(si)
return si.directory.exists?(si.name + ".del")
end
|
.has_separate_norms?(si) ⇒ Boolean
99
100
101
102
|
# File 'lib/ferret/index/segment_reader.rb', line 99
def SegmentReader.has_separate_norms?(si)
si.directory.each {|f| return true if f =~ /^#{si.name}\.s/}
return false
end
|
.uses_compound_file?(si) ⇒ Boolean
95
96
97
|
# File 'lib/ferret/index/segment_reader.rb', line 95
def SegmentReader.uses_compound_file?(si)
return si.directory.exists?(si.name + ".cfs")
end
|
Instance Method Details
#close_norms ⇒ Object
308
309
310
311
312
|
# File 'lib/ferret/index/segment_reader.rb', line 308
def close_norms()
@norms.synchronize do
@norms.each_value {|norm| norm.is.close()}
end
end
|
#deleted?(n) ⇒ Boolean
161
162
163
164
165
|
# File 'lib/ferret/index/segment_reader.rb', line 161
def deleted?(n)
synchronize do
return (@deleted_docs != nil and @deleted_docs.get(n))
end
end
|
#dir ⇒ Object
375
376
377
|
# File 'lib/ferret/index/segment_reader.rb', line 375
def dir()
return @directory
end
|
#do_close ⇒ Object
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
|
# File 'lib/ferret/index/segment_reader.rb', line 70
def do_close()
Thread.current["#{self.object_id}-#{@segment}-tv_reader"] = nil
@fields_reader.close()
@term_infos.close()
@freq_stream.close() if @freq_stream
@prox_stream.close() if @prox_stream
close_norms()
@tv_reader_orig.close() if @tv_reader_orig
@cfs_reader.close() if @cfs_reader
end
|
#do_commit ⇒ Object
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
|
# File 'lib/ferret/index/segment_reader.rb', line 50
def do_commit()
if (@deleted_docs_dirty) @deleted_docs.write(@directory, @segment + '.tmp')
@directory.rename(@segment + '.tmp', @segment + '.del')
end
if(@undelete_all and @directory.exists?(@segment + '.del'))
@directory.delete(@segment + '.del')
end
if (@norms_dirty) @norms.each_value do |norm|
if norm.dirty?
norm.re_write(@directory, @segment, max_doc(), @cfs_reader)
end
end
end
@deleted_docs_dirty = false
@norms_dirty = false
@undelete_all = false
end
|
#do_delete(doc_num) ⇒ Object
104
105
106
107
108
109
110
111
|
# File 'lib/ferret/index/segment_reader.rb', line 104
def do_delete(doc_num)
if (@deleted_docs == nil)
@deleted_docs = Ferret::Utils::BitVector.new
end
@deleted_docs_dirty = true
@undelete_all = false
@deleted_docs.set(doc_num)
end
|
#do_set_norm(doc, field, value) ⇒ Object
258
259
260
261
262
263
264
265
266
267
|
# File 'lib/ferret/index/segment_reader.rb', line 258
def do_set_norm(doc, field, value)
norm = @norms[field]
if (norm == nil) return
end
norm.dirty = true @norms_dirty = true
get_norms(field)[doc] = value end
|
#do_undelete_all ⇒ Object
113
114
115
116
117
|
# File 'lib/ferret/index/segment_reader.rb', line 113
def do_undelete_all()
@deleted_docs = nil
@deleted_docs_dirty = false
@undelete_all = true
end
|
#doc_freq(t) ⇒ Object
175
176
177
178
179
180
181
182
|
# File 'lib/ferret/index/segment_reader.rb', line 175
def doc_freq(t)
ti = @term_infos.get_term_info(t)
if (ti != nil)
return ti.doc_freq
else
return 0
end
end
|
#file_names ⇒ Object
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
|
# File 'lib/ferret/index/segment_reader.rb', line 119
def file_names()
file_names = []
IndexFileNames::INDEX_EXTENSIONS.each do |ext|
name = @segment + "." + ext
if (@directory.exists?(name))
file_names << name
end
end
@field_infos.each_with_index do |fi, i|
if (fi.indexed? and not fi.omit_norms?)
if @cfs_reader.nil?
name = "#{@segment}.f#{i}"
else
name = "#{@segment}.s#{i}"
end
if (@directory.exists?(name))
file_names << name
end
end
end
return file_names
end
|
#get_document(n) ⇒ Object
152
153
154
155
156
157
158
159
|
# File 'lib/ferret/index/segment_reader.rb', line 152
def get_document(n)
synchronize do
if deleted?(n)
raise ArgumentError, "attempt to access a deleted document"
end
return @fields_reader.doc(n)
end
end
|
#get_field_names(field_option = IndexReader::FieldOption::ALL) ⇒ Object
See IndexReader#get_field_names
#get_norms(field) ⇒ Object
243
244
245
246
247
248
249
250
251
252
253
254
255
256
|
# File 'lib/ferret/index/segment_reader.rb', line 243
def get_norms(field)
synchronize do
norm = @norms[field]
if (norm == nil) return nil
end
if (norm.bytes == nil) bytes = " " * max_doc()
get_norms_into(field, bytes, 0)
norm.bytes = bytes end
return norm.bytes
end
end
|
#get_norms_into(field, bytes, offset) ⇒ Object
Read norms into a pre-allocated array.
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
|
# File 'lib/ferret/index/segment_reader.rb', line 270
def get_norms_into(field, bytes, offset)
synchronize do
norm = @norms[field]
if (norm.nil?)
bytes[offset, max_doc()] = fake_norms[0, max_doc()]
return
end
if (norm.bytes != nil) bytes[offset, max_doc()] = norm.bytes[0, max_doc()]
return
end
norm_stream = norm.is.clone()
begin norm_stream.seek(0)
norm_stream.read_bytes(bytes, offset, max_doc())
ensure
norm_stream.close()
end
end
end
|
#get_term_vector(doc_number, field) ⇒ Object
Return a term frequency vector for the specified document and field. The vector returned contains term numbers and frequencies for all terms in the specified field of this document, if the field had storeTermVector flag set. If the flag was not set, the method returns nil.
- raises
-
IOException
343
344
345
346
347
348
349
350
351
352
353
354
355
|
# File 'lib/ferret/index/segment_reader.rb', line 343
def get_term_vector(doc_number, field)
fi = @field_infos[field]
if fi.nil? or not fi.store_term_vector? or @tv_reader_orig.nil?
return nil
end
term_vectors_reader = get_term_vectors_reader()
if (term_vectors_reader == nil)
return nil
end
return term_vectors_reader.get_field_tv(doc_number, field)
end
|
#get_term_vectors(doc_number) ⇒ Object
Return an array of term frequency vectors for the specified document. The array contains a vector for each vectorized field in the document. Each vector vector contains term numbers and frequencies for all terms in a given vectorized field. If no such fields existed, the method returns nil.
- raises
-
IOException
364
365
366
367
368
369
370
371
372
373
|
# File 'lib/ferret/index/segment_reader.rb', line 364
def get_term_vectors(doc_number)
if @tv_reader_orig.nil?
return nil
end
term_vectors_reader = get_term_vectors_reader()
if (term_vectors_reader == nil)
return nil
end
return term_vectors_reader.get_tv(doc_number)
end
|
#get_term_vectors_reader ⇒ Object
Create a clone from the initial TermVectorsReader and store it in the Thread
- returns
-
TermVectorsReader
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
|
# File 'lib/ferret/index/segment_reader.rb', line 317
def get_term_vectors_reader()
tv_reader = Thread.current.get_local(self)
if tv_reader.nil?
tv_reader = @tv_reader_orig.clone()
Thread.current.set_local(self, tv_reader)
end
return tv_reader
end
|
#has_deletions? ⇒ Boolean
90
91
92
|
# File 'lib/ferret/index/segment_reader.rb', line 90
def has_deletions?()
return @deleted_docs != nil
end
|
#has_norms?(field) ⇒ Boolean
231
232
233
|
# File 'lib/ferret/index/segment_reader.rb', line 231
def has_norms?(field)
return @norms.has_key?(field)
end
|
#max_doc ⇒ Object
192
193
194
|
# File 'lib/ferret/index/segment_reader.rb', line 192
def max_doc()
return @fields_reader.size()
end
|
#num_docs ⇒ Object
184
185
186
187
188
189
190
|
# File 'lib/ferret/index/segment_reader.rb', line 184
def num_docs()
n = max_doc()
if (@deleted_docs != nil)
n -= @deleted_docs.count()
end
return n
end
|
#open_norms(cfs_dir) ⇒ Object
293
294
295
296
297
298
299
300
301
302
303
304
305
306
|
# File 'lib/ferret/index/segment_reader.rb', line 293
def open_norms(cfs_dir)
@field_infos.each do |fi|
if (fi.indexed? and not fi.omit_norms?)
file_name = @segment + ".s" + fi.number.to_s
d = @directory
if not d.exists?(file_name)
file_name = @segment + ".f" + fi.number.to_s
d = cfs_dir
end
@norms[fi.name] = Norm.new(d.open_input(file_name), fi.number)
end
end
end
|
#term_docs ⇒ Object
167
168
169
|
# File 'lib/ferret/index/segment_reader.rb', line 167
def term_docs()
return SegmentTermDocEnum.new(self)
end
|
#term_positions ⇒ Object
171
172
173
|
# File 'lib/ferret/index/segment_reader.rb', line 171
def term_positions()
return SegmentTermDocPosEnum.new(self)
end
|
#terms ⇒ Object
144
145
146
|
# File 'lib/ferret/index/segment_reader.rb', line 144
def terms()
return @term_infos.terms()
end
|
#terms_from(t) ⇒ Object
148
149
150
|
# File 'lib/ferret/index/segment_reader.rb', line 148
def terms_from(t)
return @term_infos.terms_from(t)
end
|