Class: StuffClassifier::Bayes
Instance Attribute Summary collapse
Attributes inherited from Base
#category_list, #language, #min_prob, #name, #thresholds, #tokenizer, #training_count, #word_list
Instance Method Summary
collapse
storable, to_store
Methods inherited from Base
#cat_count, #categories, #categories_with_word_count, #classify, #incr_cat, #incr_word, open, #save_state, #total_cat_count, #total_categories, #total_word_count, #total_word_count_in_cat, #train, #word_count
Constructor Details
#initialize(name, opts = {}) ⇒ Bayes
Returns a new instance of Bayes.
12
13
14
15
16
|
# File 'lib/stuff-classifier/bayes.rb', line 12
def initialize(name, opts={})
super(name, opts)
@weight = opts[:weight] || 1.0
@assumed_prob = opts[:assumed_prob] || 0.1
end
|
Instance Attribute Details
#assumed_prob ⇒ Object
Returns the value of attribute assumed_prob.
5
6
7
|
# File 'lib/stuff-classifier/bayes.rb', line 5
def assumed_prob
@assumed_prob
end
|
#weight ⇒ Object
Returns the value of attribute weight.
4
5
6
|
# File 'lib/stuff-classifier/bayes.rb', line 4
def weight
@weight
end
|
Instance Method Details
#cat_scores(text) ⇒ Object
51
52
53
54
55
56
57
|
# File 'lib/stuff-classifier/bayes.rb', line 51
def cat_scores(text)
probs = {}
categories.each do |cat|
probs[cat] = text_prob(text, cat)
end
probs.map{|k,v| [k,v]}.sort{|a,b| b[1] <=> a[1]}
end
|
#doc_prob(text, category) ⇒ Object
39
40
41
42
43
|
# File 'lib/stuff-classifier/bayes.rb', line 39
def doc_prob(text, category)
@tokenizer.each_word(text).map {|w|
word_weighted_average(w, category)
}.inject(1) {|p,c| p * c}
end
|
#text_prob(text, category) ⇒ Object
45
46
47
48
49
|
# File 'lib/stuff-classifier/bayes.rb', line 45
def text_prob(text, category)
cat_prob = cat_count(category) / total_cat_count
doc_prob = doc_prob(text, category)
cat_prob * doc_prob
end
|
#word_classification_detail(word) ⇒ Object
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
|
# File 'lib/stuff-classifier/bayes.rb', line 60
def word_classification_detail(word)
p "word_prob"
result=self.categories.inject({}) do |h,cat| h[cat]=self.word_prob(word,cat);h end
p result
p "word_weighted_average"
result=categories.inject({}) do |h,cat| h[cat]=word_weighted_average(word,cat);h end
p result
p "doc_prob"
result=categories.inject({}) do |h,cat| h[cat]=doc_prob(word,cat);h end
p result
p "text_prob"
result=categories.inject({}) do |h,cat| h[cat]=text_prob(word,cat);h end
p result
end
|
#word_prob(word, cat) ⇒ Object
18
19
20
21
22
|
# File 'lib/stuff-classifier/bayes.rb', line 18
def word_prob(word, cat)
total_words_in_cat = total_word_count_in_cat(cat)
return 0.0 if total_words_in_cat == 0
word_count(word, cat).to_f / total_words_in_cat
end
|
#word_weighted_average(word, cat, opts = {}) ⇒ Object
25
26
27
28
29
30
31
32
33
34
35
36
37
|
# File 'lib/stuff-classifier/bayes.rb', line 25
def word_weighted_average(word, cat, opts={})
func = opts[:func]
basic_prob = func ? func.call(word, cat) : word_prob(word, cat)
totals = total_word_count(word)
(@weight * @assumed_prob + totals * basic_prob) / (@weight + totals)
end
|