Class: NaiveBayes::Classifier

Inherits:
Object
  • Object
show all
Defined in:
lib/nb/classifier.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(*categories) ⇒ Classifier

Returns a new instance of Classifier.



8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# File 'lib/nb/classifier.rb', line 8

def initialize(*categories)
  if categories.last.is_a?(Hash)
    options = categories.pop
  else
    options = {}
  end

  options[:backend] ||= :memory

  case options[:backend]
  when :memory
    @backend = Backend::Memory.new(categories)
  when :redis
    options[:host] ||= 'localhost'
    options[:port] ||= 6379

    @backend = Backend::Redis.new(categories, host: options[:host], port: options[:port])
  else
    raise "unsupported backend: #{options[:backend]}"
  end

  @default_category = categories.first
end

Instance Attribute Details

#backendObject

Returns the value of attribute backend.



6
7
8
# File 'lib/nb/classifier.rb', line 6

def backend
  @backend
end

#default_categoryObject

Returns the value of attribute default_category.



5
6
7
# File 'lib/nb/classifier.rb', line 5

def default_category
  @default_category
end

Class Method Details

.load_yaml(yaml_file) ⇒ Object

will load into a memory-backed classifier



117
118
119
120
121
122
123
124
# File 'lib/nb/classifier.rb', line 117

def load_yaml(yaml_file)
  data = YAML.load_file(yaml_file)

  new(data[:categories], backend: :memory).tap do |classifier|
    classifier.tokens_count = data[:tokens_count]
    classifier.categories_count = data[:categories_count]
  end
end

Instance Method Details

#assumed_probabilityObject

If we have only trained a little bit a class may not have had a feature yet give it a probability of 0 may not be true so we produce a assumed probability which gets smaller more we train



97
98
99
# File 'lib/nb/classifier.rb', line 97

def assumed_probability
  0.5 / (total_number_of_items.to_f / 2)
end

#classifications(*tokens) ⇒ Object



54
55
56
57
58
59
60
# File 'lib/nb/classifier.rb', line 54

def classifications(*tokens)
  scores = {}
  backend.categories.each do |category|
    scores[category] = probability_of_tokens_given_a_category(tokens, category) * probability_of_a_category(category)
  end
  scores.sort_by { |k, v| -v }
end

#classify(*tokens) ⇒ Object



44
45
46
47
48
49
50
51
52
# File 'lib/nb/classifier.rb', line 44

def classify(*tokens)
  result = classifications(*tokens).first

  if result.last == 0.0
    [@default_category, 0.0]
  else
    result
  end
end

#clear!Object



40
41
42
# File 'lib/nb/classifier.rb', line 40

def clear!
  backend.clear!
end

#dataObject



101
102
103
104
105
106
107
# File 'lib/nb/classifier.rb', line 101

def data
  {
    :categories => backend.categories,
    :tokens_count => backend.tokens_count,
    :categories_count => backend.categories_count
  }
end

#probability_of_a_category(category) ⇒ Object



82
83
84
# File 'lib/nb/classifier.rb', line 82

def probability_of_a_category(category)
  backend.categories_count[category].to_f / total_number_of_items
end

#probability_of_a_token_given_a_category(token, category) ⇒ Object



70
71
72
73
74
# File 'lib/nb/classifier.rb', line 70

def probability_of_a_token_given_a_category(token, category)
  return assumed_probability if backend.tokens_count[category][token] == 0

  backend.tokens_count[category][token].to_f / backend.categories_count[category]
end

#probability_of_a_token_in_category(token, category) ⇒ Object



66
67
68
# File 'lib/nb/classifier.rb', line 66

def probability_of_a_token_in_category(token, category)
  probability_of_a_token_given_a_category(token, category) / backend.categories.inject(0.0) { |r, c| r + probability_of_a_token_given_a_category(token, c) }
end

#probability_of_tokens_given_a_category(tokens, category) ⇒ Object



76
77
78
79
80
# File 'lib/nb/classifier.rb', line 76

def probability_of_tokens_given_a_category(tokens, category)
  tokens.inject(1.0) do |product, token|
    product * probability_of_a_token_given_a_category(token, category)
  end
end

#save(yaml_file) ⇒ Object



109
110
111
112
113
# File 'lib/nb/classifier.rb', line 109

def save(yaml_file)
  raise 'only memory backend can save' unless backend == :memory

  File.write(yaml_file, data.to_yaml)
end

#top_tokens_of_category(category, count = 20) ⇒ Object



62
63
64
# File 'lib/nb/classifier.rb', line 62

def top_tokens_of_category(category, count=20)
  backend.tokens_count[category].map { |k, v| [k, v, probability_of_a_token_in_category(k, category)] }.sort_by { |i| -i.last }.first(count)
end

#total_number_of_itemsObject

def total_number_of_tokens

@tokens_count.values.inject(0) { |sum, hash| sum + hash.values.inject(&:+) }

end



90
91
92
# File 'lib/nb/classifier.rb', line 90

def total_number_of_items
  backend.categories_count.values.inject(&:+)
end

#train(category, *tokens) ⇒ Object



32
33
34
# File 'lib/nb/classifier.rb', line 32

def train(category, *tokens)
  backend.train(category, *tokens)
end

#untrain(category, *tokens) ⇒ Object



36
37
38
# File 'lib/nb/classifier.rb', line 36

def untrain(category, *tokens)
  backend.untrain(category, *tokens)
end