Class: StuffClassifier::Bayes

Inherits:
Base
  • Object
show all
Defined in:
lib/stuff-classifier/bayes.rb

Instance Attribute Summary collapse

Attributes inherited from Base

#name

Attributes included from Tokenizer

#stemming

Instance Method Summary collapse

Methods inherited from Base

#cat_count, #categories, #incr_cat, #incr_word, open, #save_state, #total_count, #train, #word_count, #word_prob, #word_weighted_average

Methods included from Tokenizer

#each_word, #ignore_words, #ignore_words=, #stemming?

Constructor Details

#initialize(name, opts = {}) ⇒ Bayes

Returns a new instance of Bayes.



7
8
9
10
# File 'lib/stuff-classifier/bayes.rb', line 7

def initialize(name, opts={})
  super(name, opts)
  @thresholds = {}
end

Instance Attribute Details

#thresholds=(value) ⇒ Object (writeonly)



5
6
7
# File 'lib/stuff-classifier/bayes.rb', line 5

def thresholds=(value)
  @thresholds = value
end

Instance Method Details

#cat_scores(text) ⇒ Object



24
25
26
27
28
29
30
# File 'lib/stuff-classifier/bayes.rb', line 24

def cat_scores(text)
  probs = {}
  categories.each do |cat|
    probs[cat] = text_prob(text, cat)
  end
  probs.map{|k,v| [k,v]}.sort{|a,b| b[1] <=> a[1]}
end

#classify(text, default = nil) ⇒ Object



32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# File 'lib/stuff-classifier/bayes.rb', line 32

def classify(text, default=nil)
  # Find the category with the highest probability
  max_prob = 0.0
  best = nil
  
  scores = cat_scores(text)
  scores.each do |score|
    cat, prob = score
    if prob > max_prob
      max_prob = prob
      best = cat
    end
  end

  return default unless best
  threshold = @thresholds[best] || 1.0

  scores.each do |score|
    cat, prob = score
    next if cat == best
    return default if prob * threshold > max_prob
  end

  return best
end

#doc_prob(text, category) ⇒ Object



12
13
14
15
16
# File 'lib/stuff-classifier/bayes.rb', line 12

def doc_prob(text, category)
  each_word(text).map {|w|
    word_weighted_average(w, category)
  }.inject(1) {|p,c| p * c}
end

#text_prob(text, category) ⇒ Object



18
19
20
21
22
# File 'lib/stuff-classifier/bayes.rb', line 18

def text_prob(text, category)
  cat_prob = cat_count(category) / total_count
  doc_prob = doc_prob(text, category)
  cat_prob * doc_prob
end