Class: Ankusa::KLDivergenceClassifier

Inherits:
Object
  • Object
show all
Includes:
Classifier
Defined in:
lib/ankusa/kl_divergence.rb

Instance Attribute Summary

Attributes included from Classifier

#classnames

Instance Method Summary collapse

Methods included from Classifier

#initialize, #train, #untrain

Instance Method Details

#classify(text, classes = nil) ⇒ Object



6
7
8
9
10
# File 'lib/ankusa/kl_divergence.rb', line 6

def classify(text, classes=nil)
  # return the class with the least distance from the word
  # distribution of the given text
  distances(text, classes).sort_by { |c| c[1] }.first.first
end

#distances(text, classnames = nil) ⇒ Object

Classes is an array of classes to look at



14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# File 'lib/ankusa/kl_divergence.rb', line 14

def distances(text, classnames=nil)
  classnames ||= @classnames
  distances = Hash.new 0

  th = TextHash.new(text)
  th.each { |word, count|
    thprob = count.to_f / th.length.to_f
    probs = get_word_probs(word, classnames)
    classnames.each { |k| 
      distances[k] += (thprob * Math.log(thprob / probs[k]) * count) 
    }
  }

  distances
end