Class: OmniCat::Classifiers::Bayes
- Inherits:
-
Strategy
- Object
- Strategy
- OmniCat::Classifiers::Bayes
- Defined in:
- lib/omnicat/classifiers/bayes.rb
Instance Attribute Summary collapse
-
#k_value ⇒ Object
Integer - Helper value for skipping some Bayes algorithm errors.
Instance Method Summary collapse
-
#add_category(category_name) ⇒ Object
Allows adding new classification category.
-
#classify(doc_content) ⇒ Object
Classify the given document.
-
#initialize(bayes_hash = {}) ⇒ Bayes
constructor
A new instance of Bayes.
-
#train(category_name, doc_content) ⇒ Object
Train the desired category with a document.
-
#untrain(category_name, doc_content) ⇒ Object
Untrain the desired category with a document.
Constructor Details
#initialize(bayes_hash = {}) ⇒ Bayes
Returns a new instance of Bayes.
9 10 11 12 13 14 15 16 17 |
# File 'lib/omnicat/classifiers/bayes.rb', line 9 def initialize(bayes_hash = {}) super(bayes_hash) if bayes_hash.has_key?(:categories) bayes_hash[:categories].each do |name, category| @categories[name] = ::OmniCat::Classifiers::BayesInternals::Category.new(category) end end @k_value = bayes_hash[:k_value] || 1.0 end |
Instance Attribute Details
#k_value ⇒ Object
Integer - Helper value for skipping some Bayes algorithm errors
7 8 9 |
# File 'lib/omnicat/classifiers/bayes.rb', line 7 def k_value @k_value end |
Instance Method Details
#add_category(category_name) ⇒ Object
30 31 32 33 34 35 36 37 38 |
# File 'lib/omnicat/classifiers/bayes.rb', line 30 def add_category(category_name) if category_exists?(category_name) raise StandardError, "Category with name '#{category_name}' is already exists!" else increment_category_count @categories[category_name] = ::OmniCat::Classifiers::BayesInternals::Category.new end end |
#classify(doc_content) ⇒ Object
Classify the given document
Parameters
-
doc_content
- The document for classification
Returns
-
result
- OmniCat::Result object
Examples
# Classify a document
bayes.classify("good documentation")
=>
126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
# File 'lib/omnicat/classifiers/bayes.rb', line 126 def classify(doc_content) return unless classifiable? score = -1000000 result = ::OmniCat::Result.new @categories.each do |category_name, category| result.scores[category_name] = doc_probability(category, doc_content) if result.scores[category_name] > score result.category[:name] = category_name score = result.scores[category_name] end result.total_score += result.scores[category_name] end result.total_score = 1 if result.total_score == 0 result.category[:percentage] = ( result.scores[result.category[:name]] * 100.0 / result.total_score ).floor result end |
#train(category_name, doc_content) ⇒ Object
Train the desired category with a document
Parameters
-
category_name
- Name of the category from added categories list -
doc_content
- Document text
Examples
# Train the desired category
bayes.train("positive", "clear documentation")
bayes.train("positive", "good, very well")
bayes.train("negative", "bad dog")
bayes.train("neutral", "how is the management gui")
54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
# File 'lib/omnicat/classifiers/bayes.rb', line 54 def train(category_name, doc_content) if category_exists?(category_name) increment_doc_counts(category_name) update_priors doc_key = Digest::MD5.hexdigest(doc_content) if doc = @categories[category_name].docs[doc_key] doc.increment_count else doc = OmniCat::Doc.new(content: doc_content) end @categories[category_name].docs[doc_key] = doc doc.tokens.each do |token, count| increment_token_counts(category_name, token, count) @categories[category_name].tokens[token] = @categories[category_name].tokens[token].to_i + count end else raise StandardError, "Category with name '#{category_name}' does not exist!" end end |
#untrain(category_name, doc_content) ⇒ Object
Untrain the desired category with a document
Parameters
-
category_name
- Name of the category from added categories list -
doc_content
- Document text
Examples
# Untrain the desired category
bayes.untrain("positive", "clear documentation")
bayes.untrain("positive", "good, very well")
bayes.untrain("negative", "bad dog")
bayes.untrain("neutral", "how is the management gui")
89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
# File 'lib/omnicat/classifiers/bayes.rb', line 89 def untrain(category_name, doc_content) if category_exists?(category_name) doc_key = Digest::MD5.hexdigest(doc_content) if doc = @categories[category_name].docs[doc_key] @categories[category_name].docs[doc_key].decrement_count else raise StandardError, "Document is not found in #{category_name} documents!" end doc.tokens.each do |token, count| decrement_token_counts(category_name, token, count) @categories[category_name].tokens[token] = @categories[category_name].tokens[token].to_i - count end @categories[category_name].docs.delete(doc_key) if @categories[category_name].docs[doc_key].count == 0 decrement_doc_counts(category_name) update_priors else raise StandardError, "Category with name '#{category_name}' does not exist!" end end |