Class: OmniCat::Classifiers::Bayes

Inherits:
Strategy
  • Object
show all
Defined in:
lib/omnicat/classifiers/bayes.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(bayes_hash = {}) ⇒ Bayes

Returns a new instance of Bayes.



9
10
11
12
13
14
15
16
17
# File 'lib/omnicat/classifiers/bayes.rb', line 9

def initialize(bayes_hash = {})
  super(bayes_hash)
  if bayes_hash.has_key?(:categories)
    bayes_hash[:categories].each do |name, category|
      @categories[name] = ::OmniCat::Classifiers::BayesInternals::Category.new(category)
    end
  end
  @k_value = bayes_hash[:k_value] || 1.0
end

Instance Attribute Details

#k_valueObject

Integer - Helper value for skipping some Bayes algorithm errors



7
8
9
# File 'lib/omnicat/classifiers/bayes.rb', line 7

def k_value
  @k_value
end

Instance Method Details

#add_category(category_name) ⇒ Object

Allows adding new classification category

Parameters

  • category_name - Name for category

Examples

# Create a classification category
bayes = Bayes.new
bayes.add_category("positive")


30
31
32
33
34
35
36
37
38
# File 'lib/omnicat/classifiers/bayes.rb', line 30

def add_category(category_name)
  if category_exists?(category_name)
    raise StandardError,
          "Category with name '#{category_name}' is already exists!"
  else
    increment_category_count
    @categories[category_name] = ::OmniCat::Classifiers::BayesInternals::Category.new
  end
end

#classify(doc_content) ⇒ Object

Classify the given document

Parameters

  • doc_content - The document for classification

Returns

  • result - OmniCat::Result object

Examples

# Classify a document
bayes.classify("good documentation")
=>


126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# File 'lib/omnicat/classifiers/bayes.rb', line 126

def classify(doc_content)
  return unless classifiable?
  score = -1000000
  result = ::OmniCat::Result.new
  @categories.each do |category_name, category|
    result.scores[category_name] = doc_probability(category, doc_content)
    if result.scores[category_name] > score
      result.category[:name] = category_name
      score = result.scores[category_name]
    end
    result.total_score += result.scores[category_name]
  end
  result.total_score = 1 if result.total_score == 0
  result.category[:percentage] = (
    result.scores[result.category[:name]] * 100.0 /
    result.total_score
  ).floor
  result
end

#train(category_name, doc_content) ⇒ Object

Train the desired category with a document

Parameters

  • category_name - Name of the category from added categories list

  • doc_content - Document text

Examples

# Train the desired category
bayes.train("positive", "clear documentation")
bayes.train("positive", "good, very well")
bayes.train("negative", "bad dog")
bayes.train("neutral", "how is the management gui")


54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# File 'lib/omnicat/classifiers/bayes.rb', line 54

def train(category_name, doc_content)
  if category_exists?(category_name)
    increment_doc_counts(category_name)
    update_priors
    doc_key = Digest::MD5.hexdigest(doc_content)
    if doc = @categories[category_name].docs[doc_key]
      doc.increment_count
    else
      doc = OmniCat::Doc.new(content: doc_content)
    end
    @categories[category_name].docs[doc_key] = doc
    doc.tokens.each do |token, count|
      increment_token_counts(category_name, token, count)
      @categories[category_name].tokens[token] = @categories[category_name].tokens[token].to_i + count
    end
  else
    raise StandardError,
          "Category with name '#{category_name}' does not exist!"
  end
end

#untrain(category_name, doc_content) ⇒ Object

Untrain the desired category with a document

Parameters

  • category_name - Name of the category from added categories list

  • doc_content - Document text

Examples

# Untrain the desired category
bayes.untrain("positive", "clear documentation")
bayes.untrain("positive", "good, very well")
bayes.untrain("negative", "bad dog")
bayes.untrain("neutral", "how is the management gui")


89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# File 'lib/omnicat/classifiers/bayes.rb', line 89

def untrain(category_name, doc_content)
  if category_exists?(category_name)
    doc_key = Digest::MD5.hexdigest(doc_content)
    if doc = @categories[category_name].docs[doc_key]
      @categories[category_name].docs[doc_key].decrement_count
    else
      raise StandardError,
            "Document is not found in #{category_name} documents!"
    end
    doc.tokens.each do |token, count|
      decrement_token_counts(category_name, token, count)
      @categories[category_name].tokens[token] = @categories[category_name].tokens[token].to_i - count
    end
    @categories[category_name].docs.delete(doc_key) if @categories[category_name].docs[doc_key].count == 0
    decrement_doc_counts(category_name)
    update_priors
  else
    raise StandardError,
          "Category with name '#{category_name}' does not exist!"
  end
end