Class: Yanbi::Bayes
- Inherits:
-
Object
- Object
- Yanbi::Bayes
- Defined in:
- lib/bayes/bayes.rb
Direct Known Subclasses
Class Method Summary collapse
Instance Method Summary collapse
- #classify(document) ⇒ Object
- #classify_raw(text) ⇒ Object
-
#initialize(klass, *categories) ⇒ Bayes
constructor
A new instance of Bayes.
- #newdoc(doc) ⇒ Object
- #save(name) ⇒ Object
- #scores(document) ⇒ Object
- #scores_raw(text) ⇒ Object
- #set_significance(cutoff, category = nil) ⇒ Object
- #train(category, document) ⇒ Object
- #train_raw(category, text) ⇒ Object
Constructor Details
#initialize(klass, *categories) ⇒ Bayes
Returns a new instance of Bayes.
18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 |
# File 'lib/bayes/bayes.rb', line 18 def initialize(klass, *categories) raise ArgumentError unless categories.size > 1 @categories = categories @category_counts = {} @document_counts = {} @category_sizes = {} @categories.each do |category| cat = category.to_sym @category_counts[cat] = {} @document_counts[cat] = 0 end @bag_class = klass.to_s.split('::').last end |
Class Method Details
.default(*categories) ⇒ Object
34 35 36 |
# File 'lib/bayes/bayes.rb', line 34 def self.default(*categories) self.new(WordBag, *categories) end |
.load(fname) ⇒ Object
38 39 40 41 42 |
# File 'lib/bayes/bayes.rb', line 38 def self.load(fname) c = YAML::load(File.read(fname + ".obj")) raise LoadError unless c.is_a? self c end |
Instance Method Details
#classify(document) ⇒ Object
62 63 64 65 66 |
# File 'lib/bayes/bayes.rb', line 62 def classify(document) return nil if document.empty? weights = scores(document) weights.max_by(&:last).first end |
#classify_raw(text) ⇒ Object
78 79 80 |
# File 'lib/bayes/bayes.rb', line 78 def classify_raw(text) classify(self.newdoc(text)) end |
#newdoc(doc) ⇒ Object
95 96 97 |
# File 'lib/bayes/bayes.rb', line 95 def newdoc(doc) Yanbi.const_get(@bag_class).new(doc) end |
#save(name) ⇒ Object
44 45 46 47 48 |
# File 'lib/bayes/bayes.rb', line 44 def save(name) File.open(name + ".obj", 'w') do |out| YAML.dump(self, out) end end |
#scores(document) ⇒ Object
68 69 70 71 72 |
# File 'lib/bayes/bayes.rb', line 68 def scores(document) scores = {} @categories.each {|c| scores[c] = score(c, document)} scores end |
#scores_raw(text) ⇒ Object
82 83 84 |
# File 'lib/bayes/bayes.rb', line 82 def scores_raw(text) scores(self.newdoc(text)) end |
#set_significance(cutoff, category = nil) ⇒ Object
86 87 88 89 90 91 92 93 |
# File 'lib/bayes/bayes.rb', line 86 def set_significance(cutoff, category=nil) categories = (category.nil? ? @categories : [category]) categories.each do |category| cat = category.to_sym @category_counts[cat].reject! {|k,v| v < cutoff} @category_sizes[cat] = category_size(cat) end end |
#train(category, document) ⇒ Object
50 51 52 53 54 55 56 57 58 59 60 |
# File 'lib/bayes/bayes.rb', line 50 def train(category, document) cat = category.to_sym @document_counts[cat] += 1 document.words.uniq.each do |word| @category_counts[cat][word] ||= 0 @category_counts[cat][word] += 1 end @category_sizes[cat] = category_size(cat) end |
#train_raw(category, text) ⇒ Object
74 75 76 |
# File 'lib/bayes/bayes.rb', line 74 def train_raw(category, text) train(category, self.newdoc(text)) end |