Class: JnbClassifier::Classifier

Inherits:
Object
  • Object
show all
Defined in:
lib/jnb_classifier.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeClassifier

Returns a new instance of Classifier.



9
10
11
12
13
14
15
# File 'lib/jnb_classifier.rb', line 9

def initialize
  @frequency_table = Hash.new         # frequency table for each class
  @word_table = Hash.new              # word feature table
  @label_count = Hash.new(0)          # count by each label 
  @total_count = 0                    # total learned documents
  @result = Hash.new                             
end

Instance Attribute Details

#resultObject (readonly)

Returns the value of attribute result.



7
8
9
# File 'lib/jnb_classifier.rb', line 7

def result
  @result
end

Instance Method Details

#classify(attributes) ⇒ Object



32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# File 'lib/jnb_classifier.rb', line 32

def classify(attributes)
 score = Hash.new(0)               # result
 label_p = Hash.new(0)             # P(label)
 laplace_word_p = Hash.new(0)      # P(X|label)
  
 # P(Label)
 @label_count.each{|label,freq|
   label_p[label] = Math.log(freq.fdiv(@total_count))
 }

 # P(X|Label)  
 @frequency_table.each_key{|label|
   deno = @label_count[label] + @word_table.size()
   @word_table.each_key{|word|
     laplace_word_p[label] += Math.log( (@frequency_table[label][word] + 1).fdiv(deno) )
   }
   score[label] = laplace_word_p[label] + label_p[label]
 }

 # result
 score.each{|label, value|
   @result[label] = value
 }
 score.max_by{ |x| x[1] } 
end

#learn(document) ⇒ Object



17
18
19
20
21
22
23
24
25
26
27
28
29
30
# File 'lib/jnb_classifier.rb', line 17

def learn(document)
  # If frequency table does NOT have the label hash, add it
  unless @frequency_table.has_key?(document.label) then
    @frequency_table[document.label] = Hash.new(0)
  end

  document.attributes.each{|word, frequency|
    @frequency_table[document.label][word] += 1   # Multivariate Berounoulli
    @word_table[word] = 1
  }

  @label_count[document.label] += 1
  @total_count += 1
end