Class: Bae::Classifier
- Inherits:
-
Object
- Object
- Bae::Classifier
- Defined in:
- lib/bae/classifier.rb
Instance Attribute Summary collapse
-
#frequency_table ⇒ Object
Returns the value of attribute frequency_table.
-
#label_index ⇒ Object
Returns the value of attribute label_index.
-
#label_index_sequence ⇒ Object
Returns the value of attribute label_index_sequence.
-
#label_instance_count ⇒ Object
Returns the value of attribute label_instance_count.
-
#total_terms ⇒ Object
Returns the value of attribute total_terms.
Instance Method Summary collapse
- #classify(data) ⇒ Object
- #classify_from_hash(frequency_hash) ⇒ Object
- #classify_from_string(document) ⇒ Object
- #finish_training! ⇒ Object
-
#initialize ⇒ Classifier
constructor
A new instance of Classifier.
- #load_from_json(json) ⇒ Object
- #load_state(path) ⇒ Object
- #save_state(path) ⇒ Object
- #to_json ⇒ Object
- #train(label, training_data) ⇒ Object
- #train_from_hash(label, frequency_hash) ⇒ Object
- #train_from_string(label, document) ⇒ Object
Constructor Details
#initialize ⇒ Classifier
Returns a new instance of Classifier.
7 8 9 10 11 12 13 |
# File 'lib/bae/classifier.rb', line 7 def initialize @frequency_table = ::Hash.new @label_instance_count = ::Hash.new { |hash, label| hash[label] = 0 } @label_index = ::Hash.new { |hash, label| hash[label] = 0 } @label_index_sequence = -1 # start at -1 so 0 is first value @total_terms = 0.0 end |
Instance Attribute Details
#frequency_table ⇒ Object
Returns the value of attribute frequency_table.
4 5 6 |
# File 'lib/bae/classifier.rb', line 4 def frequency_table @frequency_table end |
#label_index ⇒ Object
Returns the value of attribute label_index.
4 5 6 |
# File 'lib/bae/classifier.rb', line 4 def label_index @label_index end |
#label_index_sequence ⇒ Object
Returns the value of attribute label_index_sequence.
4 5 6 |
# File 'lib/bae/classifier.rb', line 4 def label_index_sequence @label_index_sequence end |
#label_instance_count ⇒ Object
Returns the value of attribute label_instance_count.
4 5 6 |
# File 'lib/bae/classifier.rb', line 4 def label_instance_count @label_instance_count end |
#total_terms ⇒ Object
Returns the value of attribute total_terms.
4 5 6 |
# File 'lib/bae/classifier.rb', line 4 def total_terms @total_terms end |
Instance Method Details
#classify(data) ⇒ Object
52 53 54 55 56 57 58 59 60 |
# File 'lib/bae/classifier.rb', line 52 def classify(data) if data.is_a?(::String) classify_from_string(data) elsif data.is_a?(::Hash) classify_from_hash(data) else fail 'Training data must either be a string or hash' end end |
#classify_from_hash(frequency_hash) ⇒ Object
62 63 64 65 66 |
# File 'lib/bae/classifier.rb', line 62 def classify_from_hash(frequency_hash) document = frequency_hash.map{ |word, frequency| (word + ' ') * frequency }.join classify_from_string(document) end |
#classify_from_string(document) ⇒ Object
68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
# File 'lib/bae/classifier.rb', line 68 def classify_from_string(document) words = document.split.uniq likelihoods = @likelihoods.dup posterior = {} vocab_size = @frequency_table_size label_index.each do |label, index| words.map do |word| row = frequency_table[word] unless row.nil? laplace_word_likelihood = (row[index] + 1.0).to_f / (label_instance_count[label] + vocab_size).to_f likelihoods[label] *= laplace_word_likelihood / (1.0 - laplace_word_likelihood) end end posterior[label] = @priors[label] * likelihoods[label] end normalize(posterior) end |
#finish_training! ⇒ Object
15 16 17 18 19 20 |
# File 'lib/bae/classifier.rb', line 15 def finish_training! @frequency_table_size = @frequency_table.keys.size calculate_likelihoods! calculate_priors! end |
#load_from_json(json) ⇒ Object
97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
# File 'lib/bae/classifier.rb', line 97 def load_from_json(json) state = ::JSON.parse(json) fail 'Missing frequency_table' unless state['frequency_table'] fail 'Missing label_instance_count' unless state['label_instance_count'] fail 'Missing label_index' unless state['label_index'] fail 'Missing label_index_sequence' unless state['label_index_sequence'] fail 'Missing total_terms' unless state['total_terms'] @frequency_table = state['frequency_table'] @label_instance_count = state['label_instance_count'] @label_index = state['label_index'] @label_index_sequence = state['label_index_sequence'] @total_terms = state['total_terms'] finish_training! end |
#load_state(path) ⇒ Object
115 116 117 118 |
# File 'lib/bae/classifier.rb', line 115 def load_state(path) state_json = ::File.read(::File.(path)) load_from_json(state_json) end |
#save_state(path) ⇒ Object
91 92 93 94 95 |
# File 'lib/bae/classifier.rb', line 91 def save_state(path) ::File.open(::File.(path), 'w') do |handle| handle.write(to_json) end end |
#to_json ⇒ Object
120 121 122 123 124 125 126 127 128 |
# File 'lib/bae/classifier.rb', line 120 def to_json state = {} state['frequency_table'] = frequency_table state['label_instance_count'] = label_instance_count state['label_index'] = label_index state['label_index_sequence'] = label_index_sequence state['total_terms'] = total_terms state.to_json end |
#train(label, training_data) ⇒ Object
22 23 24 25 26 27 28 29 30 |
# File 'lib/bae/classifier.rb', line 22 def train(label, training_data) if training_data.is_a?(::String) train_from_string(label, training_data) elsif training_data.is_a?(::Hash) train_from_hash(label, training_data) else fail 'Training data must either be a string or hash' end end |
#train_from_hash(label, frequency_hash) ⇒ Object
43 44 45 46 47 48 49 50 |
# File 'lib/bae/classifier.rb', line 43 def train_from_hash(label, frequency_hash) frequency_hash.each do |word, frequency| update_label_index(label) update_frequency_table(label, word, frequency) end @label_instance_count[label] += 1 @total_terms += 1 end |
#train_from_string(label, document) ⇒ Object
32 33 34 35 36 37 38 39 40 41 |
# File 'lib/bae/classifier.rb', line 32 def train_from_string(label, document) words = document.split words.each do |word| update_label_index(label) update_frequency_table(label, word, 1) end @label_instance_count[label] += 1 @total_terms += 1 end |