Class: Judgee::Classifier
- Inherits:
-
Object
- Object
- Judgee::Classifier
- Defined in:
- lib/judgee/classifier.rb
Constant Summary collapse
- CATEGORIES_KEY =
Constants
"judgee:categories"
- CATEGORY_KEY =
"judgee:category"
- ALPHA =
1.0
Instance Attribute Summary collapse
-
#redis ⇒ Object
readonly
Returns the value of attribute redis.
Instance Method Summary collapse
- #classify(data) ⇒ Object
- #classify_fast(data) ⇒ Object
- #flush_category(category) ⇒ Object
- #flushdb(flush_db = false) ⇒ Object
-
#initialize(options = {}) ⇒ Classifier
constructor
A new instance of Classifier.
- #train(category, data) ⇒ Object
- #train_fast(category, data) ⇒ Object
- #untrain(category, data) ⇒ Object
- #untrain_fast(category, data) ⇒ Object
Constructor Details
#initialize(options = {}) ⇒ Classifier
Returns a new instance of Classifier.
18 19 20 |
# File 'lib/judgee/classifier.rb', line 18 def initialize(={}) @redis = Redis.new() end |
Instance Attribute Details
#redis ⇒ Object (readonly)
Returns the value of attribute redis.
16 17 18 |
# File 'lib/judgee/classifier.rb', line 16 def redis @redis end |
Instance Method Details
#classify(data) ⇒ Object
66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
# File 'lib/judgee/classifier.rb', line 66 def classify(data) result = Hash.new(0) categories = redis.smembers(CATEGORIES_KEY) categories.each do |category| count_occurance(data).each do |word, word_count| numerator = (redis.hget(redis_category_key(category), word).to_i + ALPHA).to_f denominator = (categories.map { |category| redis.hget(redis_category_key(category), word).to_i }.inject(0, :+) + (ALPHA * data.length)).to_f result[category] += (word_count * Math.log(numerator / denominator)).abs end end result.min_by(&:last).first.to_sym end |
#classify_fast(data) ⇒ Object
81 82 83 84 85 86 87 88 89 90 91 92 93 |
# File 'lib/judgee/classifier.rb', line 81 def classify_fast(data) result = Hash.new(0) categories = redis.smembers(CATEGORIES_KEY) occurances = count_occurance(data) categories.each do |category| numerator = Hash[occurances.keys.zip(redis.hmget(redis_category_key(category), occurances.keys))].inject({}) { |hash, (key, value)| hash[key] = value.to_f + ALPHA; hash } denominator = categories.map { |category| Hash[occurances.keys.zip(redis.hmget(redis_category_key(category), occurances.keys))] }.inject(Hash.new(0)) { |main_hash, sub_hash| main_hash.merge(sub_hash) { |key, value_first, value_second| value_first.to_f + value_second.to_f} }.inject(Hash.new(0)) { |hash, (key, value)| hash[key] = value.to_f + (ALPHA * data.length); hash } result[category] += numerator.merge(denominator) { |key, value_numerator, value_denominator| (occurances[key] * Math.log(value_numerator / value_denominator)).abs }.values.inject(0, :+) end result.min_by(&:last).first.to_sym end |
#flush_category(category) ⇒ Object
103 104 105 106 |
# File 'lib/judgee/classifier.rb', line 103 def flush_category(category) redis.del(redis_category_key(category)) redis.srem(CATEGORIES_KEY, category_name(category)) end |
#flushdb(flush_db = false) ⇒ Object
97 98 99 |
# File 'lib/judgee/classifier.rb', line 97 def flushdb(flush_db=false) redis.flushdb if flush_db end |
#train(category, data) ⇒ Object
23 24 25 26 27 28 29 |
# File 'lib/judgee/classifier.rb', line 23 def train(category, data) redis.sadd(CATEGORIES_KEY, category_name(category)) count_occurance(data).each do |word, word_count| redis.hincrby(redis_category_key(category), word, word_count) end "OK" end |
#train_fast(category, data) ⇒ Object
31 32 33 34 35 36 37 38 |
# File 'lib/judgee/classifier.rb', line 31 def train_fast(category, data) redis.sadd(CATEGORIES_KEY, category_name(category)) occurances = count_occurance(data) database_occurances = Hash[occurances.keys.zip(redis.hmget(redis_category_key(category), occurances.keys))] new_occurances = occurances.merge(database_occurances) { |key, value_occurance, value_database_occurance| value_occurance.to_i + value_database_occurance.to_i }.to_a.flatten! redis.hmset(redis_category_key(category), new_occurances) "OK" end |
#untrain(category, data) ⇒ Object
42 43 44 45 46 47 48 49 50 51 52 |
# File 'lib/judgee/classifier.rb', line 42 def untrain(category, data) count_occurance(data).each do |word, word_count| new_count = [(redis.hget(redis_category_key(category), word).to_i - word_count), 0].max if new_count > 0 redis.hset(redis_category_key(category), word, new_count) else redis.hdel(redis_category_key(category), word) end end "OK" end |
#untrain_fast(category, data) ⇒ Object
54 55 56 57 58 59 60 61 62 |
# File 'lib/judgee/classifier.rb', line 54 def untrain_fast(category, data) occurances = count_occurance(data) database_occurances = Hash[occurances.keys.zip(redis.hmget(redis_category_key(category), occurances.keys))] untrain_occurances = database_occurances.merge(occurances) { |key, value_occurance, value_untrain_occurance| value_occurance.to_i - value_untrain_occurance.to_i } empty_occurances = untrain_occurances.select { |key, value| value.to_i <= 0 } redis.hmset(redis_category_key(category), untrain_occurances.to_a.flatten!) redis.hdel(redis_category_key(category), empty_occurances.keys) unless empty_occurances.empty? "OK" end |