Class: Yanbi::Bayes

Inherits:
Object
  • Object
show all
Defined in:
lib/bayes/bayes.rb

Direct Known Subclasses

Fisher

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(klass, *categories) ⇒ Bayes

Returns a new instance of Bayes.

Raises:

  • (ArgumentError)


18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# File 'lib/bayes/bayes.rb', line 18

def initialize(klass, *categories)
  raise ArgumentError unless categories.size > 1
  @categories = categories
  @category_counts = {}
  @document_counts = {}
  @category_sizes = {}
  
  @categories.each do |category|
    cat = category.to_sym
    @category_counts[cat] = {}
    @document_counts[cat] = 0 
  end

  @bag_class = klass.to_s.split('::').last
end

Class Method Details

.default(*categories) ⇒ Object



34
35
36
# File 'lib/bayes/bayes.rb', line 34

def self.default(*categories)
  self.new(WordBag, *categories)
end

.load(fname) ⇒ Object

Raises:

  • (LoadError)


38
39
40
41
42
# File 'lib/bayes/bayes.rb', line 38

def self.load(fname)
  c = YAML::load(File.read(fname + ".obj"))
  raise LoadError unless c.is_a? self 
  c
end

Instance Method Details

#classify(document) ⇒ Object



62
63
64
65
66
# File 'lib/bayes/bayes.rb', line 62

def classify(document)
  return nil if document.empty?
  weights = scores(document)
  weights.max_by(&:last).first
end

#classify_raw(text) ⇒ Object



78
79
80
# File 'lib/bayes/bayes.rb', line 78

def classify_raw(text)
  classify(self.newdoc(text))
end

#newdoc(doc) ⇒ Object



95
96
97
# File 'lib/bayes/bayes.rb', line 95

def newdoc(doc)
  Yanbi.const_get(@bag_class).new(doc)
end

#save(name) ⇒ Object



44
45
46
47
48
# File 'lib/bayes/bayes.rb', line 44

def save(name)
  File.open(name + ".obj", 'w') do |out|
     YAML.dump(self, out)
  end
end

#scores(document) ⇒ Object



68
69
70
71
72
# File 'lib/bayes/bayes.rb', line 68

def scores(document)
  scores = {}
  @categories.each {|c| scores[c] = score(c, document)}
  scores
end

#scores_raw(text) ⇒ Object



82
83
84
# File 'lib/bayes/bayes.rb', line 82

def scores_raw(text)
  scores(self.newdoc(text))
end

#set_significance(cutoff, category = nil) ⇒ Object



86
87
88
89
90
91
92
93
# File 'lib/bayes/bayes.rb', line 86

def set_significance(cutoff, category=nil)
  categories = (category.nil? ? @categories : [category])
  categories.each do |category|
    cat = category.to_sym
    @category_counts[cat].reject! {|k,v| v < cutoff}
    @category_sizes[cat] = category_size(cat)
  end
end

#train(category, document) ⇒ Object



50
51
52
53
54
55
56
57
58
59
60
# File 'lib/bayes/bayes.rb', line 50

def train(category, document)
  cat = category.to_sym
  @document_counts[cat] += 1    
  
  document.words.uniq.each do |word|
    @category_counts[cat][word] ||= 0
    @category_counts[cat][word] += 1
  end

  @category_sizes[cat] = category_size(cat)
end

#train_raw(category, text) ⇒ Object



74
75
76
# File 'lib/bayes/bayes.rb', line 74

def train_raw(category, text)
  train(category, self.newdoc(text))
end