Class: TeRex::Classifier::Bayes
- Inherits:
-
Object
- Object
- TeRex::Classifier::Bayes
- Defined in:
- lib/te_rex/bayes.rb
Instance Attribute Summary collapse
-
#category_counts ⇒ Object
readonly
Returns the value of attribute category_counts.
-
#messages ⇒ Object
readonly
Returns the value of attribute messages.
-
#total_words ⇒ Object
readonly
Returns the value of attribute total_words.
Instance Method Summary collapse
- #categories ⇒ Object
- #classifications(text) ⇒ Object
- #classify(text) ⇒ Object
-
#initialize(*categories) ⇒ Bayes
constructor
categories = [=> “Thing1”, :msg => “Thing1 message”, => “Thing2”, :msg => “Thing2 message”] initialize(=> “Refund”, :msg => “You’ll get a refund”, => “Nonrefund”, :msg => “You won’t get a refund”).
- #train(ctgry, text) ⇒ Object
- #training_description ⇒ Object
- #under_trained? ⇒ Boolean
Constructor Details
#initialize(*categories) ⇒ Bayes
categories = [=> “Thing1”, :msg => “Thing1 message”, => “Thing2”, :msg => “Thing2 message”] initialize(=> “Refund”, :msg => “You’ll get a refund”, => “Nonrefund”, :msg => “You won’t get a refund”)
12 13 14 15 16 17 18 19 |
# File 'lib/te_rex/bayes.rb', line 12 def initialize(*categories) @clasif = Hash.new @messages = Hash.new categories.each {|cat| @clasif[TeRex::Format.category_term(cat[:tag])] = Hash.new} categories.each {|cat| @messages[cat[:tag]] = cat[:msg]} @total_words = 0 @category_counts = Hash.new(0) end |
Instance Attribute Details
#category_counts ⇒ Object (readonly)
Returns the value of attribute category_counts.
8 9 10 |
# File 'lib/te_rex/bayes.rb', line 8 def category_counts @category_counts end |
#messages ⇒ Object (readonly)
Returns the value of attribute messages.
8 9 10 |
# File 'lib/te_rex/bayes.rb', line 8 def @messages end |
#total_words ⇒ Object (readonly)
Returns the value of attribute total_words.
8 9 10 |
# File 'lib/te_rex/bayes.rb', line 8 def total_words @total_words end |
Instance Method Details
#categories ⇒ Object
57 58 59 |
# File 'lib/te_rex/bayes.rb', line 57 def categories @classif.keys.collect {|c| c.to_s} end |
#classifications(text) ⇒ Object
33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
# File 'lib/te_rex/bayes.rb', line 33 def classifications(text) score = Hash.new training_count = @category_counts.values.inject {|x,y| x+y}.to_f @clasif.each do |category, category_words| score[category.to_s] = 0 total = category_words.values.inject(0) {|sum, element| sum+element} BayesData.index_frequency(text).each do |word, count| s = category_words.has_key?(word) ? category_words[word] : 0.1 score[category.to_s] += Math.log(s/total.to_f) end k = @category_counts.has_key?(category) ? @category_counts[category] : 0.1 score[category.to_s] += Math.log(k/training_count) end score end |
#classify(text) ⇒ Object
52 53 54 55 |
# File 'lib/te_rex/bayes.rb', line 52 def classify(text) tag = (classifications(text).sort_by{|a| -a[1]})[0][0] [tag, @messages[tag]] end |
#train(ctgry, text) ⇒ Object
21 22 23 24 25 26 27 28 29 30 31 |
# File 'lib/te_rex/bayes.rb', line 21 def train(ctgry, text) category = TeRex::Format.category_term(ctgry) @category_counts[category] += 1 BayesData.index_frequency(text).each do |word, count| @clasif[category][word] ||= 0 @clasif[category][word] += count @total_words += count end end |
#training_description ⇒ Object
61 62 63 64 65 66 67 68 69 70 |
# File 'lib/te_rex/bayes.rb', line 61 def training_description max_threshold = (@total_words/self.category_counts.keys.count).to_f tmp = [] @clasif.each_pair do |term,val| cc = self.category_counts[term] train_ratio = (@total_words/cc).to_f tmp << [(train_ratio >= max_threshold), term, "description" => {"training_ratio" => "#{train_ratio}", "threshold" => "#{max_threshold}", "category_counts" => "#{cc}", "total_words" => "#{@total_words}"}] end tmp end |
#under_trained? ⇒ Boolean
72 73 74 |
# File 'lib/te_rex/bayes.rb', line 72 def under_trained? training_description.select {|ut| ut.first == true} end |