Class: OmniCat::Classifiers::Bayes

Inherits:
Base
  • Object
show all
Defined in:
lib/omnicat/classifiers/bayes.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods inherited from Base

#add_categories, #classify_batch, #train_batch

Methods inherited from Base

#to_hash

Constructor Details

#initialize(bayes_hash = {}) ⇒ Bayes



8
9
10
11
12
13
14
15
16
17
18
19
# File 'lib/omnicat/classifiers/bayes.rb', line 8

def initialize(bayes_hash = {})
  self.categories = ::OmniCat::Hash.new
  if bayes_hash.has_key?(:categories)
    bayes_hash[:categories].each do |name, category|
      self.categories[name] = ::OmniCat::Classifiers::BayesInternals::Category.new(category)
    end
  end
  self.category_count = bayes_hash[:category_count].to_i
  self.doc_count = bayes_hash[:doc_count].to_i
  self.k_value = bayes_hash[:k_value] || 1.0
  self.token_count = bayes_hash[:token_count].to_i
end

Instance Attribute Details

#categoriesObject

Returns the value of attribute categories.



5
6
7
# File 'lib/omnicat/classifiers/bayes.rb', line 5

def categories
  @categories
end

#category_countObject

Returns the value of attribute category_count.



5
6
7
# File 'lib/omnicat/classifiers/bayes.rb', line 5

def category_count
  @category_count
end

#doc_countObject

Returns the value of attribute doc_count.



5
6
7
# File 'lib/omnicat/classifiers/bayes.rb', line 5

def doc_count
  @doc_count
end

#k_valueObject

helper val for skipping some Bayes theorem errors



6
7
8
# File 'lib/omnicat/classifiers/bayes.rb', line 6

def k_value
  @k_value
end

#token_countObject

Returns the value of attribute token_count.



5
6
7
# File 'lib/omnicat/classifiers/bayes.rb', line 5

def token_count
  @token_count
end

Instance Method Details

#add_category(name) ⇒ Object

Allows adding new classification category

Parameters

  • name - Name for category

Examples

# Create a classification category
bayes = Bayes.new
bayes.add_category("positive")


32
33
34
35
36
37
38
39
40
# File 'lib/omnicat/classifiers/bayes.rb', line 32

def add_category(name)
  if category_exists?(name)
    raise StandardError,
          "Category with name '#{name}' is already exists!"
  else
    self.category_count +=1
    self.categories[name] = ::OmniCat::Classifiers::BayesInternals::Category.new
  end
end

#classify(doc) ⇒ Object

Classify the given document

Parameters

  • doc - The document for classification

Returns

  • result - OmniCat::Result object

Examples

# Classify a document
bayes.classify("good documentation")
=>


86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# File 'lib/omnicat/classifiers/bayes.rb', line 86

def classify(doc)
  if category_count < 2
    return raise StandardError,
                 "At least 2 categories needed for classification process!"
  end
  score = -1000000
  result = ::OmniCat::Result.new
  categories.each do |name, category|
    prior = category.doc_count / doc_count.to_f
    result.scores[name] = k_value
    doc.tokenize_with_counts.each do |token, count|
      result.scores[name] *= (
        (category.tokens[token].to_i + k_value) /
        (category.token_count + token_count)
      ) if category.tokens.has_key?(token)
    end
    result.scores[name] = (
      result.scores[name].to_f == 1.0 ? 0 : (prior * result.scores[name])
    )
    if result.scores[name] > score
      result.category[:name] = name;
      score = result.scores[name];
    end
    result.total_score += result.scores[name]
  end
  result.total_score = 1 if result.total_score == 0
  result.category[:percentage] = (
    result.scores[result.category[:name]] * 100.0 /
    result.total_score
  ).floor
  result
end

#train(category, doc) ⇒ Object

Train the desired category with a document

Parameters

  • category - Name of the category from added categories list

  • doc - Document text

Examples

# Train the desired category
bayes.train("positive", "clear documentation")
bayes.train("positive", "good, very well")
bayes.train("negative", "bad dog")
bayes.train("neutral", "how is the management gui")


56
57
58
59
60
61
62
63
64
65
66
67
68
69
# File 'lib/omnicat/classifiers/bayes.rb', line 56

def train(category, doc)
  if category_exists?(category)
    self.doc_count += 1
    categories[category].doc_count += 1
    doc.tokenize_with_counts.each do |token, count|
      self.token_count += count
      self.categories[category].tokens[token] = self.categories[category].tokens[token].to_i + count
      self.categories[category].token_count += count
    end
  else
    raise StandardError,
          "Category with name '#{category}' does not exist!"
  end
end