Class: Elastic::Stats::NaiveBayes::TokenStats

Inherits:
Object
  • Object
show all
Defined in:
lib/elastic/stats/naive-bayes/token_stats.rb

Overview

Provide statistics about a token in a specific set of data

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(token, set) ⇒ TokenStats

Returns a new instance of TokenStats.



8
9
10
11
# File 'lib/elastic/stats/naive-bayes/token_stats.rb', line 8

def initialize(token, set)
  @token = token
  @set = set
end

Instance Attribute Details

#setObject (readonly)

Returns the value of attribute set.



6
7
8
# File 'lib/elastic/stats/naive-bayes/token_stats.rb', line 6

def set
  @set
end

#tokenObject (readonly)

Returns the value of attribute token.



6
7
8
# File 'lib/elastic/stats/naive-bayes/token_stats.rb', line 6

def token
  @token
end

Instance Method Details

#bayes(category) ⇒ Object



38
39
40
41
42
43
44
45
46
# File 'lib/elastic/stats/naive-bayes/token_stats.rb', line 38

def bayes(category)
  return 0 if count == 0
  return 0 if (probability(category) + inverse(category)) == 0
  calculated = log_protect(
    probability(category) / (probability(category) + inverse(category))
  )
  adjust(calculated)
  Math.log(1 - calculated) - Math.log(calculated)
end

#categoriesObject

Returns the categories associated with the token in the set as a Hash



19
20
21
# File 'lib/elastic/stats/naive-bayes/token_stats.rb', line 19

def categories
  set.token_categories[token]
end

#countObject

Returns the number of documents that contains the token



14
15
16
# File 'lib/elastic/stats/naive-bayes/token_stats.rb', line 14

def count
  set.tokens[token]
end

#inverse(category) ⇒ Object

Returns the inverse probability that a token is in the category



31
32
33
34
35
36
# File 'lib/elastic/stats/naive-bayes/token_stats.rb', line 31

def inverse(category)
  return 0 unless categories.has_key? category
  return 0 if (set.count - set.categories[category]) == 0
  (count - categories[category]) / \
    (set.count - set.categories[category]).to_f
end

#probability(category) ⇒ Object

Returns the probability that a token is in the specified category



24
25
26
27
28
# File 'lib/elastic/stats/naive-bayes/token_stats.rb', line 24

def probability(category)
  return 0 unless categories.has_key? category
  return 0 if set.categories[category] == 0
  categories[category] / set.categories[category].to_f
end