Class: Bayesball::Classifier

Inherits:
Object
  • Object
show all
Defined in:
lib/bayesball/classifier.rb

Constant Summary collapse

STOP_WORDS =
IO.read(File.expand_path('../stopwords.txt',__FILE__)).split

Instance Method Summary collapse

Constructor Details

#initialize(persistence = {}) ⇒ Classifier



5
6
7
# File 'lib/bayesball/classifier.rb', line 5

def initialize(persistence = {})
  @persistence = persistence
end

Instance Method Details

#classify(payload) ⇒ Object



37
38
39
40
# File 'lib/bayesball/classifier.rb', line 37

def classify(payload)
  s = score(payload)
  s.sort_by{|k,v| v}.reverse[0][0]
end

#score(payload) ⇒ Object



26
27
28
29
30
31
32
33
34
35
# File 'lib/bayesball/classifier.rb', line 26

def score(payload)
  @persistence.reduce(Hash.new(0)) do |memo, (category, counts)|
    total = counts.values.reduce(:+).to_f
    word_counts(payload).each do |word, count|
      s = counts.fetch(word, 0.0001)
      memo[category] += Math.log(s/total)
    end
    memo
  end
end

#train(category, payload) ⇒ Object



9
10
11
12
13
14
15
16
# File 'lib/bayesball/classifier.rb', line 9

def train(category, payload)
  counts = @persistence[category] ||= {}

  word_counts(payload).each do |word, count|
    counts[word] = counts.fetch(word,0) + count
  end
  @persistence[category] = counts
end

#word_counts(payload) ⇒ Object



18
19
20
21
22
23
24
# File 'lib/bayesball/classifier.rb', line 18

def word_counts(payload)
  words = payload.downcase.gsub(/[^\w]|(\b\w{1,2}\b)/,' ').split.reject { |w| STOP_WORDS.include?(w) }
  words.reduce(Hash.new(0)) do |memo, word|
    memo[word] += 1
    memo
  end
end