Class: Ferret::Index::SegmentReader
Overview
FIXME: Describe class SegmentReader
here.
Defined Under Namespace
Classes: Norm
Constant Summary
Constants inherited
from IndexReader
IndexReader::FILENAME_EXTENSIONS
Instance Attribute Summary collapse
Attributes inherited from IndexReader
#directory
Class Method Summary
collapse
Instance Method Summary
collapse
Methods inherited from IndexReader
#acquire_write_lock, #close, #commit, #delete, #delete_docs_with_term, get_current_version, #get_document_with_term, index_exists?, #latest?, open, #set_norm, #term_docs_for, #term_positions_for, #undelete_all
Constructor Details
#initialize(dir, info, seg_infos, close, owner) ⇒ SegmentReader
Returns a new instance of SegmentReader.
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
|
# File 'lib/ferret/index/segment_reader.rb', line 14
def initialize(dir, info, seg_infos, close, owner)
super(dir, seg_infos, close, owner)
@segment = info.name
@cfs_reader = nil
cfs = directory
if directory.exists?(@segment + '.cfs') then
@cfs_reader = CompoundFileReader.new(directory, @segment + '.cfs')
cfs = @cfs_reader
end
@field_infos = FieldInfos.new(cfs, @segment + '.fnm')
@fields_reader = FieldsReader.new(cfs, @segment, @field_infos)
@term_infos = TermInfosReader.new(cfs, @segment, @field_infos)
@deleted_docs = nil
@deleted_docs_dirty = false
if SegmentReader.has_deletions?(info) then
@deleted_docs =
Ferret::Utils::BitVector.read(directory, @segment + '.del')
end
@freq_stream = cfs.open_input(@segment + '.frq')
@prox_stream = cfs.open_input(@segment + '.prx')
@norms = {}
@norms.extend(MonitorMixin)
@norms_dirty = false
open_norms(cfs)
@tv_reader_orig = nil
if @field_infos.has_vectors? then
@tv_reader_orig = TermVectorsReader.new(cfs, @segment, @field_infos)
end
end
|
Instance Attribute Details
#deleted_docs ⇒ Object
Returns the value of attribute deleted_docs.
7
8
9
|
# File 'lib/ferret/index/segment_reader.rb', line 7
def deleted_docs
@deleted_docs
end
|
#field_infos ⇒ Object
Returns the value of attribute field_infos.
7
8
9
|
# File 'lib/ferret/index/segment_reader.rb', line 7
def field_infos
@field_infos
end
|
#freq_stream ⇒ Object
Returns the value of attribute freq_stream.
7
8
9
|
# File 'lib/ferret/index/segment_reader.rb', line 7
def freq_stream
@freq_stream
end
|
#prox_stream ⇒ Object
Returns the value of attribute prox_stream.
7
8
9
|
# File 'lib/ferret/index/segment_reader.rb', line 7
def prox_stream
@prox_stream
end
|
#segment ⇒ Object
Returns the value of attribute segment.
7
8
9
|
# File 'lib/ferret/index/segment_reader.rb', line 7
def segment
@segment
end
|
#term_infos ⇒ Object
Returns the value of attribute term_infos.
7
8
9
|
# File 'lib/ferret/index/segment_reader.rb', line 7
def term_infos
@term_infos
end
|
Class Method Details
.get(info, infos = nil, close = false) ⇒ Object
10
11
12
|
# File 'lib/ferret/index/segment_reader.rb', line 10
def SegmentReader.get(info, infos = nil, close = false)
return SegmentReader.new(info.directory, info, infos, close, infos!=nil)
end
|
.has_deletions?(si) ⇒ Boolean
85
86
87
|
# File 'lib/ferret/index/segment_reader.rb', line 85
def SegmentReader.has_deletions?(si)
return si.directory.exists?(si.name + ".del")
end
|
.has_separate_norms?(si) ⇒ Boolean
98
99
100
101
|
# File 'lib/ferret/index/segment_reader.rb', line 98
def SegmentReader.has_separate_norms?(si)
si.directory.each {|f| return true if f =~ /^#{si.name}\.s/}
return false
end
|
.uses_compound_file?(si) ⇒ Boolean
94
95
96
|
# File 'lib/ferret/index/segment_reader.rb', line 94
def SegmentReader.uses_compound_file?(si)
return si.directory.exists?(si.name + ".cfs")
end
|
Instance Method Details
#close_norms ⇒ Object
292
293
294
295
296
|
# File 'lib/ferret/index/segment_reader.rb', line 292
def close_norms()
@norms.synchronize do
@norms.each_value {|norm| norm.is.close()}
end
end
|
#deleted?(n) ⇒ Boolean
160
161
162
163
164
|
# File 'lib/ferret/index/segment_reader.rb', line 160
def deleted?(n)
synchronize do
return (@deleted_docs != nil and @deleted_docs.get(n))
end
end
|
#dir ⇒ Object
359
360
361
|
# File 'lib/ferret/index/segment_reader.rb', line 359
def dir()
return @directory
end
|
#do_close ⇒ Object
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
|
# File 'lib/ferret/index/segment_reader.rb', line 69
def do_close()
Thread.current["#{self.object_id}-#{@segment}-tv_reader"] = nil
@fields_reader.close()
@term_infos.close()
@freq_stream.close() if @freq_stream
@prox_stream.close() if @prox_stream
close_norms()
@tv_reader_orig.close() if @tv_reader_orig
@cfs_reader.close() if @cfs_reader
end
|
#do_commit ⇒ Object
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
|
# File 'lib/ferret/index/segment_reader.rb', line 49
def do_commit()
if (@deleted_docs_dirty) @deleted_docs.write(@directory, @segment + '.tmp')
@directory.rename(@segment + '.tmp', @segment + '.del')
end
if(@undelete_all and @directory.exists?(@segment + '.del'))
@directory.delete(@segment + '.del')
end
if (@norms_dirty) @norms.each_value do |norm|
if norm.dirty?
norm.re_write(@directory, @segment, max_doc(), @cfs_reader)
end
end
end
@deleted_docs_dirty = false
@norms_dirty = false
@undelete_all = false
end
|
#do_delete(doc_num) ⇒ Object
103
104
105
106
107
108
109
110
|
# File 'lib/ferret/index/segment_reader.rb', line 103
def do_delete(doc_num)
if (@deleted_docs == nil)
@deleted_docs = Ferret::Utils::BitVector.new
end
@deleted_docs_dirty = true
@undelete_all = false
@deleted_docs.set(doc_num)
end
|
#do_set_norm(doc, field, value) ⇒ Object
245
246
247
248
249
250
251
252
253
254
|
# File 'lib/ferret/index/segment_reader.rb', line 245
def do_set_norm(doc, field, value)
norm = @norms[field]
if (norm == nil) return
end
norm.dirty = true @norms_dirty = true
get_norms(field)[doc] = value end
|
#do_undelete_all ⇒ Object
112
113
114
115
116
|
# File 'lib/ferret/index/segment_reader.rb', line 112
def do_undelete_all()
@deleted_docs = nil
@deleted_docs_dirty = false
@undelete_all = true
end
|
#doc_freq(t) ⇒ Object
174
175
176
177
178
179
180
181
|
# File 'lib/ferret/index/segment_reader.rb', line 174
def doc_freq(t)
ti = @term_infos.get_term_info(t)
if (ti != nil)
return ti.doc_freq
else
return 0
end
end
|
#file_names ⇒ Object
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
|
# File 'lib/ferret/index/segment_reader.rb', line 118
def file_names()
file_names = []
IndexFileNames::INDEX_EXTENSIONS.each do |ext|
name = @segment + "." + ext
if (@directory.exists?(name))
file_names << name
end
end
@field_infos.each_with_index do |fi, i|
if (fi.indexed?)
if @cfs_reader.nil?
name = @segment + ".f" + i.to_s
else
name = @segment + ".s" + i.to_s
end
if (@directory.exists?(name))
file_names << name
end
end
end
return file_names
end
|
#get_document(n) ⇒ Object
151
152
153
154
155
156
157
158
|
# File 'lib/ferret/index/segment_reader.rb', line 151
def get_document(n)
synchronize do
if deleted?(n)
raise ArgumentError, "attempt to access a deleted document"
end
return @fields_reader.doc(n)
end
end
|
#get_field_names(field_option = IndexReader::FieldOption::ALL) ⇒ Object
See IndexReader#get_field_names
#get_norms(field) ⇒ Object
230
231
232
233
234
235
236
237
238
239
240
241
242
243
|
# File 'lib/ferret/index/segment_reader.rb', line 230
def get_norms(field)
synchronize do
norm = @norms[field]
if (norm == nil) return nil
end
if (norm.bytes == nil) bytes = " " * max_doc()
get_norms_into(field, bytes, 0)
norm.bytes = bytes end
return norm.bytes
end
end
|
#get_norms_into(field, bytes, offset) ⇒ Object
Read norms into a pre-allocated array.
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
|
# File 'lib/ferret/index/segment_reader.rb', line 257
def get_norms_into(field, bytes, offset)
synchronize do
norm = @norms[field]
return if (norm == nil)
if (norm.bytes != nil) bytes[offset, max_doc()] = norm.bytes[0, max_doc()]
return
end
norm_stream = norm.is.clone()
begin norm_stream.seek(0)
norm_stream.read_bytes(bytes, offset, max_doc())
ensure
norm_stream.close()
end
end
end
|
#get_term_vector(doc_number, field) ⇒ Object
Return a term frequency vector for the specified document and field. The vector returned contains term numbers and frequencies for all terms in the specified field of this document, if the field had storeTermVector flag set. If the flag was not set, the method returns nil.
- raises
-
IOException
327
328
329
330
331
332
333
334
335
336
337
338
339
|
# File 'lib/ferret/index/segment_reader.rb', line 327
def get_term_vector(doc_number, field)
fi = @field_infos[field]
if fi.nil? or not fi.store_term_vector? or @tv_reader_orig.nil?
return nil
end
term_vectors_reader = get_term_vectors_reader()
if (term_vectors_reader == nil)
return nil
end
return term_vectors_reader.get_field_tv(doc_number, field)
end
|
#get_term_vectors(doc_number) ⇒ Object
Return an array of term frequency vectors for the specified document. The array contains a vector for each vectorized field in the document. Each vector vector contains term numbers and frequencies for all terms in a given vectorized field. If no such fields existed, the method returns nil.
- raises
-
IOException
348
349
350
351
352
353
354
355
356
357
|
# File 'lib/ferret/index/segment_reader.rb', line 348
def get_term_vectors(doc_number)
if @tv_reader_orig.nil?
return nil
end
term_vectors_reader = get_term_vectors_reader()
if (term_vectors_reader == nil)
return nil
end
return term_vectors_reader.get_tv(doc_number)
end
|
#get_term_vectors_reader ⇒ Object
Create a clone from the initial TermVectorsReader and store it in the Thread
- returns
-
TermVectorsReader
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
|
# File 'lib/ferret/index/segment_reader.rb', line 301
def get_term_vectors_reader()
tv_reader = Thread.current.get_local(self)
if tv_reader.nil?
tv_reader = @tv_reader_orig.clone()
Thread.current.set_local(self, tv_reader)
end
return tv_reader
end
|
#has_deletions? ⇒ Boolean
89
90
91
|
# File 'lib/ferret/index/segment_reader.rb', line 89
def has_deletions?()
return @deleted_docs != nil
end
|
#max_doc ⇒ Object
191
192
193
|
# File 'lib/ferret/index/segment_reader.rb', line 191
def max_doc()
return @fields_reader.size()
end
|
#num_docs ⇒ Object
183
184
185
186
187
188
189
|
# File 'lib/ferret/index/segment_reader.rb', line 183
def num_docs()
n = max_doc()
if (@deleted_docs != nil)
n -= @deleted_docs.count()
end
return n
end
|
#open_norms(cfs_dir) ⇒ Object
277
278
279
280
281
282
283
284
285
286
287
288
289
290
|
# File 'lib/ferret/index/segment_reader.rb', line 277
def open_norms(cfs_dir)
@field_infos.each do |fi|
if (fi.indexed?)
file_name = @segment + ".s" + fi.number.to_s
d = @directory
if not d.exists?(file_name)
file_name = @segment + ".f" + fi.number.to_s
d = cfs_dir
end
@norms[fi.name] = Norm.new(d.open_input(file_name), fi.number)
end
end
end
|
#term_docs ⇒ Object
166
167
168
|
# File 'lib/ferret/index/segment_reader.rb', line 166
def term_docs()
return SegmentTermDocEnum.new(self)
end
|
#term_positions ⇒ Object
170
171
172
|
# File 'lib/ferret/index/segment_reader.rb', line 170
def term_positions()
return SegmentTermDocPosEnum.new(self)
end
|
#terms ⇒ Object
143
144
145
|
# File 'lib/ferret/index/segment_reader.rb', line 143
def terms()
return @term_infos.terms()
end
|
#terms_from(t) ⇒ Object
147
148
149
|
# File 'lib/ferret/index/segment_reader.rb', line 147
def terms_from(t)
return @term_infos.terms_from(t)
end
|