Class: Ferret::Search::TermScorer

Inherits:
Scorer
  • Object
show all
Defined in:
lib/ferret/search/term_scorer.rb

Overview

Expert: A Scorer for documents matching a Term.

Constant Summary collapse

SCORE_CACHE_SIZE =
32

Constants inherited from Scorer

Scorer::MAX_DOCS

Instance Attribute Summary collapse

Attributes inherited from Scorer

#similarity

Instance Method Summary collapse

Constructor Details

#initialize(weight, td, similarity, norms) ⇒ TermScorer

Construct a TermScorer.

weight

The weight of the Term in the query.

td

An iterator over the documents matching the Term.

similarity

The Similarity implementation to be used for score

computations.

norms

The field norms of the document fields for the Term.



16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# File 'lib/ferret/search/term_scorer.rb', line 16

def initialize(weight, td, similarity, norms) 
  super(similarity)

  @doc = 0
  @docs = Array.new(SCORE_CACHE_SIZE, 0) # buffered doc numbers
  @freqs = Array.new(SCORE_CACHE_SIZE, 0) # buffered term freqs
  @pointer = @pointer_max = 0;
  @score_cache = Array.new(SCORE_CACHE_SIZE)

  @weight = weight
  @term_docs = td
  @norms = norms
  @weight_value = weight.value

  SCORE_CACHE_SIZE.times do |i|
    @score_cache[i] = similarity().tf(i) * @weight_value
  end
end

Instance Attribute Details

#docObject (readonly)

Returns the current document number matching the query. Initially invalid, until #next() is called the first time.



8
9
10
# File 'lib/ferret/search/term_scorer.rb', line 8

def doc
  @doc
end

Instance Method Details

#each_hitObject

Expert: Iterates over matching all documents, yielding the document number and the score.

returns

true if more matching documents may remain.



39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# File 'lib/ferret/search/term_scorer.rb', line 39

def each_hit() # :yields: doc, score
  sim = similarity() # cache sim in local
  while next?
    f = @freqs[@pointer]

    # compute tf(f)*weight
    if f < SCORE_CACHE_SIZE                    # check cache
      score = @score_cache[f]                  # cache hit
    else
      score = sim.tf(f) * @weight_value # cache miss
    end

    score *= sim.decode_norm(@norms[@doc])      # normalize for field

    yield(@doc, score)                         # collect score
  end
end

#each_hit_up_to(max = MAX_DOCS) ⇒ Object

Expert: Iterates over matching documents in a range.

NOTE: that #next? needs to be called first.

max

Do not score documents past this. Default will search all documents

avaliable.

returns

true if more matching documents may remain.



64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# File 'lib/ferret/search/term_scorer.rb', line 64

def each_hit_up_to(max = MAX_DOCS) # :yields: doc, score
  sim = similarity() # cache sim in local
  while (@doc < max) # for docs in window
    f = @freqs[@pointer]

    # compute tf(f)*weight
    if f < SCORE_CACHE_SIZE                    # check cache
      score = @score_cache[f]                  # cache hit
    else
      score = sim.tf(f) * @weight_value # cache miss
    end

    score *= sim.decode_norm(@norms[@doc])      # normalize for field

    yield(@doc, score)                         # collect score
    if not next?
      return false
    end
  end
  return true # false if we didn't find +max+ hits
end

#explain(doc) ⇒ Object

Returns an explanation of the score for a document.

When this method is used, the #next() method and the #score() method should not be used.

doc

The document number for the explanation.

TODO: Modify to make use of TermDocEnum#skip_to(int).



156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
# File 'lib/ferret/search/term_scorer.rb', line 156

def explain(doc)
  query = @weight.query()
  tf_explanation = Explanation.new()
  tf = 0
  while (@pointer < @pointer_max) 
    if (@docs[@pointer] == doc)
      tf = @freqs[@pointer]
    end
    @pointer += 1
  end
  if (tf == 0) 
    while (@term_docs.next?) 
      if (@term_docs.doc() == doc) 
        tf = @term_docs.freq()
      end
    end
  end
  @term_docs.close()
  tf_explanation.value = similarity().tf(tf)
  tf_explanation.description = "tf(term_freq(#{query.term})=#{tf})"
  
  return tf_explanation
end

#next?Boolean

Advances to the next document matching the query.

The iterator over the matching documents is buffered using TermDocEnum#read(int[],int).

returns

true iff there is another document matching the query.

Returns:

  • (Boolean)


93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# File 'lib/ferret/search/term_scorer.rb', line 93

def next?()
  @pointer += 1
  if @pointer >= @pointer_max
    @pointer_max = @term_docs.read(@docs, @freqs) # refill buffer
    if @pointer_max != 0
      @pointer = 0
    else 
      @term_docs.close()                          # close stream
      @doc = MAX_DOCS                             # set to sentinel value
      return false
    end
  end
  @doc = @docs[@pointer]
  return true
end

#scoreObject



109
110
111
112
113
114
115
116
117
118
119
# File 'lib/ferret/search/term_scorer.rb', line 109

def score() 
  f = @freqs[@pointer]
  # compute tf(f)*weight
  if  f < SCORE_CACHE_SIZE                 # check cache
    raw = @score_cache[f]                  # cache hit
  else
    raw = similarity().tf(f) * @weight_value # cache miss
  end

  return raw * Similarity.decode_norm(@norms[@doc]) # normalize for field
end

#skip_to(target) ⇒ Object

Skips to the first match beyond the current whose document number is greater than or equal to a given target.

The implementation uses TermDocEnum#skip_to(int).

target

The target document number.

returns

true iff there is such a match.



127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
# File 'lib/ferret/search/term_scorer.rb', line 127

def skip_to(target)
  # first scan in cache
  while (@pointer += 1) < @pointer_max
    if @docs[@pointer] >= target
      @doc = @docs[@pointer]
      return true
    end
  end

  # not found in cache, seek underlying stream
  result = @term_docs.skip_to(target)
  if (result) 
    @pointer_max = 1
    @pointer = 0
    @docs[@pointer] = @doc = @term_docs.doc
    @freqs[@pointer] = @term_docs.freq
  else 
    @doc = MAX_DOCS
  end
  return result
end

#to_sObject

Returns a string representation of this TermScorer.



181
# File 'lib/ferret/search/term_scorer.rb', line 181

def to_s() return "scorer(" + @weight + ")"; end