Class: StuffClassifier::Bayes

Inherits:
Base
  • Object
show all
Extended by:
Storage::ActAsStorable
Defined in:
lib/stuff-classifier/bayes.rb

Instance Attribute Summary collapse

Attributes inherited from Base

#category_list, #language, #min_prob, #name, #thresholds, #tokenizer, #training_count, #word_list

Instance Method Summary collapse

Methods included from Storage::ActAsStorable

storable, to_store

Methods inherited from Base

#cat_count, #categories, #categories_with_word_count, #classify, #incr_cat, #incr_word, open, #save_state, #total_cat_count, #total_categories, #total_word_count, #total_word_count_in_cat, #train, #word_count

Constructor Details

#initialize(name, opts = {}) ⇒ Bayes

Returns a new instance of Bayes.



12
13
14
15
16
# File 'lib/stuff-classifier/bayes.rb', line 12

def initialize(name, opts={})
  super(name, opts)
  @weight = opts[:weight] || 1.0
  @assumed_prob = opts[:assumed_prob] || 0.1
end

Instance Attribute Details

#assumed_probObject

Returns the value of attribute assumed_prob.



5
6
7
# File 'lib/stuff-classifier/bayes.rb', line 5

def assumed_prob
  @assumed_prob
end

#weightObject

Returns the value of attribute weight.



4
5
6
# File 'lib/stuff-classifier/bayes.rb', line 4

def weight
  @weight
end

Instance Method Details

#cat_scores(text) ⇒ Object



51
52
53
54
55
56
57
# File 'lib/stuff-classifier/bayes.rb', line 51

def cat_scores(text)
  probs = {}
  categories.each do |cat|
    probs[cat] = text_prob(text, cat)
  end
  probs.map{|k,v| [k,v]}.sort{|a,b| b[1] <=> a[1]}
end

#doc_prob(text, category) ⇒ Object



39
40
41
42
43
# File 'lib/stuff-classifier/bayes.rb', line 39

def doc_prob(text, category)
  @tokenizer.each_word(text).map {|w|
    word_weighted_average(w, category)
  }.inject(1) {|p,c| p * c}
end

#text_prob(text, category) ⇒ Object



45
46
47
48
49
# File 'lib/stuff-classifier/bayes.rb', line 45

def text_prob(text, category)
  cat_prob = cat_count(category) / total_cat_count
  doc_prob = doc_prob(text, category)
  cat_prob * doc_prob
end

#word_classification_detail(word) ⇒ Object



60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# File 'lib/stuff-classifier/bayes.rb', line 60

def word_classification_detail(word)

  p "word_prob"
  result=self.categories.inject({}) do |h,cat| h[cat]=self.word_prob(word,cat);h end
  p result

  p "word_weighted_average"
  result=categories.inject({}) do |h,cat| h[cat]=word_weighted_average(word,cat);h end  
  p result

  p "doc_prob"
  result=categories.inject({}) do |h,cat| h[cat]=doc_prob(word,cat);h end  
  p result

  p "text_prob"
  result=categories.inject({}) do |h,cat| h[cat]=text_prob(word,cat);h end  
  p result
  
  
end

#word_prob(word, cat) ⇒ Object



18
19
20
21
22
# File 'lib/stuff-classifier/bayes.rb', line 18

def word_prob(word, cat)
  total_words_in_cat = total_word_count_in_cat(cat)
  return 0.0 if total_words_in_cat == 0
  word_count(word, cat).to_f / total_words_in_cat
end

#word_weighted_average(word, cat, opts = {}) ⇒ Object



25
26
27
28
29
30
31
32
33
34
35
36
37
# File 'lib/stuff-classifier/bayes.rb', line 25

def word_weighted_average(word, cat, opts={})
  func = opts[:func]

  # calculate current probability
  basic_prob = func ? func.call(word, cat) : word_prob(word, cat)
  
  # count the number of times this word has appeared in all
  # categories
  totals = total_word_count(word)
  
  # the final weighted average
  (@weight * @assumed_prob + totals * basic_prob) / (@weight + totals)
end