Class: Bayesball::Classifier
- Inherits:
-
Object
- Object
- Bayesball::Classifier
- Defined in:
- lib/bayesball/classifier.rb
Constant Summary collapse
- STOP_WORDS =
IO.read(File.('../stopwords.txt',__FILE__)).split
Instance Method Summary collapse
- #classify(payload) ⇒ Object
-
#initialize(persistence = {}) ⇒ Classifier
constructor
A new instance of Classifier.
- #score(payload) ⇒ Object
- #train(category, payload) ⇒ Object
- #word_counts(payload) ⇒ Object
Constructor Details
#initialize(persistence = {}) ⇒ Classifier
5 6 7 |
# File 'lib/bayesball/classifier.rb', line 5 def initialize(persistence = {}) @persistence = persistence end |
Instance Method Details
#classify(payload) ⇒ Object
37 38 39 40 |
# File 'lib/bayesball/classifier.rb', line 37 def classify(payload) s = score(payload) s.sort_by{|k,v| v}.reverse[0][0] end |
#score(payload) ⇒ Object
26 27 28 29 30 31 32 33 34 35 |
# File 'lib/bayesball/classifier.rb', line 26 def score(payload) @persistence.reduce(Hash.new(0)) do |memo, (category, counts)| total = counts.values.reduce(:+).to_f word_counts(payload).each do |word, count| s = counts.fetch(word, 0.0001) memo[category] += Math.log(s/total) end memo end end |
#train(category, payload) ⇒ Object
9 10 11 12 13 14 15 16 |
# File 'lib/bayesball/classifier.rb', line 9 def train(category, payload) counts = @persistence[category] ||= {} word_counts(payload).each do |word, count| counts[word] = counts.fetch(word,0) + count end @persistence[category] = counts end |
#word_counts(payload) ⇒ Object
18 19 20 21 22 23 24 |
# File 'lib/bayesball/classifier.rb', line 18 def word_counts(payload) words = payload.downcase.gsub(/[^\w]|(\b\w{1,2}\b)/,' ').split.reject { |w| STOP_WORDS.include?(w) } words.reduce(Hash.new(0)) do |memo, word| memo[word] += 1 memo end end |