Class: OmniCat::Classifiers::Bayes
- Defined in:
- lib/omnicat/classifiers/bayes.rb
Instance Attribute Summary collapse
-
#categories ⇒ Object
Returns the value of attribute categories.
-
#category_count ⇒ Object
Returns the value of attribute category_count.
-
#doc_count ⇒ Object
Returns the value of attribute doc_count.
-
#k_value ⇒ Object
helper val for skipping some Bayes theorem errors.
-
#token_count ⇒ Object
Returns the value of attribute token_count.
Instance Method Summary collapse
-
#add_category(name) ⇒ Object
Allows adding new classification category.
-
#classify(doc) ⇒ Object
Classify the given document.
-
#initialize(bayes_hash = {}) ⇒ Bayes
constructor
A new instance of Bayes.
-
#train(category, doc) ⇒ Object
Train the desired category with a document.
Methods inherited from Base
#add_categories, #classify_batch, #train_batch
Methods inherited from Base
Constructor Details
#initialize(bayes_hash = {}) ⇒ Bayes
8 9 10 11 12 13 14 15 16 17 18 19 |
# File 'lib/omnicat/classifiers/bayes.rb', line 8 def initialize(bayes_hash = {}) self.categories = ::OmniCat::Hash.new if bayes_hash.has_key?(:categories) bayes_hash[:categories].each do |name, category| self.categories[name] = ::OmniCat::Classifiers::BayesInternals::Category.new(category) end end self.category_count = bayes_hash[:category_count].to_i self.doc_count = bayes_hash[:doc_count].to_i self.k_value = bayes_hash[:k_value] || 1.0 self.token_count = bayes_hash[:token_count].to_i end |
Instance Attribute Details
#categories ⇒ Object
Returns the value of attribute categories.
5 6 7 |
# File 'lib/omnicat/classifiers/bayes.rb', line 5 def categories @categories end |
#category_count ⇒ Object
Returns the value of attribute category_count.
5 6 7 |
# File 'lib/omnicat/classifiers/bayes.rb', line 5 def category_count @category_count end |
#doc_count ⇒ Object
Returns the value of attribute doc_count.
5 6 7 |
# File 'lib/omnicat/classifiers/bayes.rb', line 5 def doc_count @doc_count end |
#k_value ⇒ Object
helper val for skipping some Bayes theorem errors
6 7 8 |
# File 'lib/omnicat/classifiers/bayes.rb', line 6 def k_value @k_value end |
#token_count ⇒ Object
Returns the value of attribute token_count.
5 6 7 |
# File 'lib/omnicat/classifiers/bayes.rb', line 5 def token_count @token_count end |
Instance Method Details
#add_category(name) ⇒ Object
Allows adding new classification category
Parameters
-
name- Name for category
Examples
# Create a classification category
bayes = Bayes.new
bayes.add_category("positive")
32 33 34 35 36 37 38 39 40 |
# File 'lib/omnicat/classifiers/bayes.rb', line 32 def add_category(name) if category_exists?(name) raise StandardError, "Category with name '#{name}' is already exists!" else self.category_count +=1 self.categories[name] = ::OmniCat::Classifiers::BayesInternals::Category.new end end |
#classify(doc) ⇒ Object
Classify the given document
Parameters
-
doc- The document for classification
Returns
-
result- OmniCat::Result object
Examples
# Classify a document
bayes.classify("good documentation")
=>
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 |
# File 'lib/omnicat/classifiers/bayes.rb', line 86 def classify(doc) if category_count < 2 return raise StandardError, "At least 2 categories needed for classification process!" end score = -1000000 result = ::OmniCat::Result.new categories.each do |name, category| prior = category.doc_count / doc_count.to_f result.scores[name] = k_value doc.tokenize_with_counts.each do |token, count| result.scores[name] *= ( (category.tokens[token].to_i + k_value) / (category.token_count + token_count) ) if category.tokens.has_key?(token) end result.scores[name] = ( result.scores[name].to_f == 1.0 ? 0 : (prior * result.scores[name]) ) if result.scores[name] > score result.category[:name] = name; score = result.scores[name]; end result.total_score += result.scores[name] end result.total_score = 1 if result.total_score == 0 result.category[:percentage] = ( result.scores[result.category[:name]] * 100.0 / result.total_score ).floor result end |
#train(category, doc) ⇒ Object
Train the desired category with a document
Parameters
-
category- Name of the category from added categories list -
doc- Document text
Examples
# Train the desired category
bayes.train("positive", "clear documentation")
bayes.train("positive", "good, very well")
bayes.train("negative", "bad dog")
bayes.train("neutral", "how is the management gui")
56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
# File 'lib/omnicat/classifiers/bayes.rb', line 56 def train(category, doc) if category_exists?(category) self.doc_count += 1 categories[category].doc_count += 1 doc.tokenize_with_counts.each do |token, count| self.token_count += count self.categories[category].tokens[token] = self.categories[category].tokens[token].to_i + count self.categories[category].token_count += count end else raise StandardError, "Category with name '#{category}' does not exist!" end end |