Class: StuffClassifier::TfIdf
Instance Attribute Summary
Attributes inherited from Base
#category_list, #language, #min_prob, #name, #thresholds, #tokenizer, #training_count, #word_list
Instance Method Summary
collapse
storable, to_store
Methods inherited from Base
#cat_count, #categories, #categories_with_word_count, #classify, #incr_cat, #incr_word, open, #save_state, #total_cat_count, #total_categories, #total_word_count, #total_word_count_in_cat, #train, #word_count
Constructor Details
#initialize(name, opts = {}) ⇒ TfIdf
Returns a new instance of TfIdf.
5
6
7
|
# File 'lib/stuff-classifier/tf-idf.rb', line 5
def initialize(name, opts={})
super(name, opts)
end
|
Instance Method Details
#cat_scores(text) ⇒ Object
24
25
26
27
28
29
30
31
|
# File 'lib/stuff-classifier/tf-idf.rb', line 24
def cat_scores(text)
probs = {}
categories.each do |cat|
p = text_prob(text, cat)
probs[cat] = p
end
probs.map{|k,v| [k,v]}.sort{|a,b| b[1] <=> a[1]}
end
|
#text_prob(text, cat) ⇒ Object
20
21
22
|
# File 'lib/stuff-classifier/tf-idf.rb', line 20
def text_prob(text, cat)
@tokenizer.each_word(text).map{|w| word_prob(w, cat)}.inject(0){|s,p| s + p}
end
|
#word_classification_detail(word) ⇒ Object
33
34
35
36
37
38
39
40
41
42
43
|
# File 'lib/stuff-classifier/tf-idf.rb', line 33
def word_classification_detail(word)
p "tf_idf"
result=self.categories.inject({}) do |h,cat| h[cat]=self.word_prob(word,cat);h end
ap result
p "text_prob"
result=categories.inject({}) do |h,cat| h[cat]=text_prob(word,cat);h end
ap result
end
|
#word_prob(word, cat) ⇒ Object
10
11
12
13
14
15
16
17
18
|
# File 'lib/stuff-classifier/tf-idf.rb', line 10
def word_prob(word, cat)
word_cat_nr = word_count(word, cat)
cat_nr = cat_count(cat)
tf = 1.0 * word_cat_nr / cat_nr
idf = Math.log10((total_categories + 2) / (categories_with_word_count(word) + 1.0))
tf * idf
end
|