Class: NaiveBayes::Classifier
- Inherits:
-
Object
- Object
- NaiveBayes::Classifier
- Defined in:
- lib/nb/classifier.rb
Instance Attribute Summary collapse
-
#backend ⇒ Object
Returns the value of attribute backend.
-
#default_category ⇒ Object
Returns the value of attribute default_category.
Class Method Summary collapse
-
.load_yaml(yaml_file) ⇒ Object
will load into a memory-backed classifier.
Instance Method Summary collapse
-
#assumed_probability ⇒ Object
If we have only trained a little bit a class may not have had a feature yet give it a probability of 0 may not be true so we produce a assumed probability which gets smaller more we train.
- #classifications(*tokens) ⇒ Object
- #classify(*tokens) ⇒ Object
- #clear! ⇒ Object
- #data ⇒ Object
-
#initialize(*categories) ⇒ Classifier
constructor
A new instance of Classifier.
- #probability_of_a_category(category) ⇒ Object
- #probability_of_a_token_given_a_category(token, category) ⇒ Object
- #probability_of_a_token_in_category(token, category) ⇒ Object
- #probability_of_tokens_given_a_category(tokens, category) ⇒ Object
- #save(yaml_file) ⇒ Object
- #top_tokens_of_category(category, count = 20) ⇒ Object
-
#total_number_of_items ⇒ Object
def total_number_of_tokens @tokens_count.values.inject(0) { |sum, hash| sum + hash.values.inject(&:+) } end.
- #train(category, *tokens) ⇒ Object
- #untrain(category, *tokens) ⇒ Object
Constructor Details
#initialize(*categories) ⇒ Classifier
Returns a new instance of Classifier.
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 |
# File 'lib/nb/classifier.rb', line 8 def initialize(*categories) if categories.last.is_a?(Hash) = categories.pop else = {} end [:backend] ||= :memory case [:backend] when :memory @backend = Backend::Memory.new(categories) when :redis [:host] ||= 'localhost' [:port] ||= 6379 @backend = Backend::Redis.new(categories, host: [:host], port: [:port]) else raise "unsupported backend: #{[:backend]}" end @default_category = categories.first end |
Instance Attribute Details
#backend ⇒ Object
Returns the value of attribute backend.
6 7 8 |
# File 'lib/nb/classifier.rb', line 6 def backend @backend end |
#default_category ⇒ Object
Returns the value of attribute default_category.
5 6 7 |
# File 'lib/nb/classifier.rb', line 5 def default_category @default_category end |
Class Method Details
.load_yaml(yaml_file) ⇒ Object
will load into a memory-backed classifier
117 118 119 120 121 122 123 124 |
# File 'lib/nb/classifier.rb', line 117 def load_yaml(yaml_file) data = YAML.load_file(yaml_file) new(data[:categories], backend: :memory).tap do |classifier| classifier.tokens_count = data[:tokens_count] classifier.categories_count = data[:categories_count] end end |
Instance Method Details
#assumed_probability ⇒ Object
If we have only trained a little bit a class may not have had a feature yet give it a probability of 0 may not be true so we produce a assumed probability which gets smaller more we train
97 98 99 |
# File 'lib/nb/classifier.rb', line 97 def assumed_probability 0.5 / (total_number_of_items.to_f / 2) end |
#classifications(*tokens) ⇒ Object
54 55 56 57 58 59 60 |
# File 'lib/nb/classifier.rb', line 54 def classifications(*tokens) scores = {} backend.categories.each do |category| scores[category] = probability_of_tokens_given_a_category(tokens, category) * probability_of_a_category(category) end scores.sort_by { |k, v| -v } end |
#classify(*tokens) ⇒ Object
44 45 46 47 48 49 50 51 52 |
# File 'lib/nb/classifier.rb', line 44 def classify(*tokens) result = classifications(*tokens).first if result.last == 0.0 [@default_category, 0.0] else result end end |
#clear! ⇒ Object
40 41 42 |
# File 'lib/nb/classifier.rb', line 40 def clear! backend.clear! end |
#data ⇒ Object
101 102 103 104 105 106 107 |
# File 'lib/nb/classifier.rb', line 101 def data { :categories => backend.categories, :tokens_count => backend.tokens_count, :categories_count => backend.categories_count } end |
#probability_of_a_category(category) ⇒ Object
82 83 84 |
# File 'lib/nb/classifier.rb', line 82 def probability_of_a_category(category) backend.categories_count[category].to_f / total_number_of_items end |
#probability_of_a_token_given_a_category(token, category) ⇒ Object
70 71 72 73 74 |
# File 'lib/nb/classifier.rb', line 70 def probability_of_a_token_given_a_category(token, category) return assumed_probability if backend.tokens_count[category][token] == 0 backend.tokens_count[category][token].to_f / backend.categories_count[category] end |
#probability_of_a_token_in_category(token, category) ⇒ Object
66 67 68 |
# File 'lib/nb/classifier.rb', line 66 def probability_of_a_token_in_category(token, category) probability_of_a_token_given_a_category(token, category) / backend.categories.inject(0.0) { |r, c| r + probability_of_a_token_given_a_category(token, c) } end |
#probability_of_tokens_given_a_category(tokens, category) ⇒ Object
76 77 78 79 80 |
# File 'lib/nb/classifier.rb', line 76 def probability_of_tokens_given_a_category(tokens, category) tokens.inject(1.0) do |product, token| product * probability_of_a_token_given_a_category(token, category) end end |
#save(yaml_file) ⇒ Object
109 110 111 112 113 |
# File 'lib/nb/classifier.rb', line 109 def save(yaml_file) raise 'only memory backend can save' unless backend == :memory File.write(yaml_file, data.to_yaml) end |
#top_tokens_of_category(category, count = 20) ⇒ Object
62 63 64 |
# File 'lib/nb/classifier.rb', line 62 def top_tokens_of_category(category, count=20) backend.tokens_count[category].map { |k, v| [k, v, probability_of_a_token_in_category(k, category)] }.sort_by { |i| -i.last }.first(count) end |
#total_number_of_items ⇒ Object
def total_number_of_tokens
@tokens_count.values.inject(0) { |sum, hash| sum + hash.values.inject(&:+) }
end
90 91 92 |
# File 'lib/nb/classifier.rb', line 90 def total_number_of_items backend.categories_count.values.inject(&:+) end |
#train(category, *tokens) ⇒ Object
32 33 34 |
# File 'lib/nb/classifier.rb', line 32 def train(category, *tokens) backend.train(category, *tokens) end |
#untrain(category, *tokens) ⇒ Object
36 37 38 |
# File 'lib/nb/classifier.rb', line 36 def untrain(category, *tokens) backend.untrain(category, *tokens) end |