Class: Ferret::Search::TermScorer

Inherits:
Scorer
  • Object
show all
Defined in:
lib/ferret/search/term_scorer.rb

Overview

Expert: A Scorer for documents matching a Term.

Constant Summary collapse

SCORE_CACHE_SIZE =
32

Constants inherited from Scorer

Scorer::MAX_DOCS

Instance Attribute Summary collapse

Attributes inherited from Scorer

#similarity

Instance Method Summary collapse

Constructor Details

#initialize(weight, td, similarity, norms) ⇒ TermScorer

Construct a TermScorer.

weight

The weight of the Term in the query.

td

An iterator over the documents matching the Term.

similarity

The Similarity implementation to be used for score

computations.

norms

The field norms of the document fields for the Term.



16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# File 'lib/ferret/search/term_scorer.rb', line 16

def initialize(weight, td, similarity, norms) 
  super(similarity)

  @doc = 0
  @docs = Array.new(32, 0) # buffered doc numbers

  @freqs = Array.new(32, 0) # buffered term freqs

  @pointer = @pointer_max = 0;
  @score_cache = Array.new(SCORE_CACHE_SIZE)

  @weight = weight
  @term_docs = td
  @norms = norms
  #XXX

  @norms_size = @norms.size
  @weight_value = weight.value

  SCORE_CACHE_SIZE.times do |i|
    @score_cache[i] = similarity().tf(i) * @weight_value
  end
end

Instance Attribute Details

#docObject (readonly)

Returns the current document number matching the query. Initially invalid, until #next() is called the first time.



8
9
10
# File 'lib/ferret/search/term_scorer.rb', line 8

def doc
  @doc
end

Instance Method Details

#each_hitObject

Expert: Iterates over matching all documents, yielding the document number and the score.

returns

true if more matching documents may remain.



41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# File 'lib/ferret/search/term_scorer.rb', line 41

def each_hit() # :yields: doc, score

  sim = similarity() # cache sim in local

  while next?
    f = @freqs[@pointer]

    # compute tf(f)*weight

    if f < SCORE_CACHE_SIZE                    # check cache

      score = @score_cache[f]                  # cache hit

    else
      score = sim.tf(f) * @weight_value # cache miss

    end

    score *= sim.decode_norm(@norms[@doc])      # normalize for field


    yield(@doc, score)                         # collect score

  end
end

#each_hit_up_to(max = MAX_DOCS) ⇒ Object

Expert: Iterates over matching documents in a range.

NOTE: that #next? needs to be called first.

max

Do not score documents past this. Default will search all documents

avaliable.

returns

true if more matching documents may remain.



66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# File 'lib/ferret/search/term_scorer.rb', line 66

def each_hit_up_to(max = MAX_DOCS) # :yields: doc, score

  sim = similarity() # cache sim in local

  while (@doc < max) # for docs in window

    f = @freqs[@pointer]

    # compute tf(f)*weight

    if f < SCORE_CACHE_SIZE                    # check cache

      score = @score_cache[f]                  # cache hit

    else
      score = sim.tf(f) * @weight_value # cache miss

    end

    score *= sim.decode_norm(@norms[@doc])      # normalize for field


    yield(@doc, score)                         # collect score

    if not next?
      return false
    end
  end
  return true # false if we didn't find +max+ hits

end

#explain(doc) ⇒ Object

Returns an explanation of the score for a document.

When this method is used, the #next() method and the #score() method should not be used.

doc

The document number for the explanation.

TODO: Modify to make use of TermDocEnum#skip_to(int).



158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
# File 'lib/ferret/search/term_scorer.rb', line 158

def explain(doc)
  query = @weight.query()
  tf_explanation = Explanation.new()
  tf = 0
  while (@pointer < @pointer_max) 
    if (@docs[@pointer] == doc)
      tf = @freqs[@pointer]
    end
    @pointer += 1
  end
  if (tf == 0) 
    while (@term_docs.next?) 
      if (@term_docs.doc() == doc) 
        tf = @term_docs.freq()
      end
    end
  end
  @term_docs.close()
  tf_explanation.value = similarity().tf(tf)
  tf_explanation.description = "tf(term_freq(#{query.term})=#{tf})"
  
  return tf_explanation
end

#next?Boolean

Advances to the next document matching the query.

The iterator over the matching documents is buffered using TermDocEnum#read(int[],int).

returns

true iff there is another document matching the query.



95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# File 'lib/ferret/search/term_scorer.rb', line 95

def next?()
  @pointer += 1
  if @pointer >= @pointer_max
    @pointer_max = @term_docs.read(@docs, @freqs) # refill buffer

    if @pointer_max != 0
      @pointer = 0
    else 
      @term_docs.close()                          # close stream

      @doc = MAX_DOCS                             # set to sentinel value

      return false
    end
  end
  @doc = @docs[@pointer]
  return true
end

#scoreObject



111
112
113
114
115
116
117
118
119
120
121
# File 'lib/ferret/search/term_scorer.rb', line 111

def score() 
  f = @freqs[@pointer]
  # compute tf(f)*weight

  if  f < SCORE_CACHE_SIZE                 # check cache

    raw = @score_cache[f]                  # cache hit

  else
    raw = similarity().tf(f) * @weight_value # cache miss

  end

  return raw * Similarity.decode_norm(@norms[@doc]) # normalize for field

end

#skip_to(target) ⇒ Object

Skips to the first match beyond the current whose document number is greater than or equal to a given target.

The implementation uses TermDocEnum#skip_to(int).

target

The target document number.

returns

true iff there is such a match.



129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
# File 'lib/ferret/search/term_scorer.rb', line 129

def skip_to(target)
  # first scan in cache

  while (@pointer += 1) < @pointer_max
    if @docs[@pointer] >= target
      @doc = @docs[@pointer]
      return true
    end
  end

  # not found in cache, seek underlying stream

  result = @term_docs.skip_to(target)
  if (result) 
    @pointer_max = 1
    @pointer = 0
    @docs[@pointer] = @doc = @term_docs.doc
    @freqs[@pointer] = @term_docs.freq
  else 
    @doc = MAX_DOCS
  end
  return result
end

#to_sObject

Returns a string representation of this TermScorer.



183
# File 'lib/ferret/search/term_scorer.rb', line 183

def to_s() return "scorer(" + @weight + ")"; end