Class: Judgee::Classifier

Inherits:
Object
  • Object
show all
Defined in:
lib/judgee/classifier.rb

Constant Summary collapse

CATEGORIES_KEY =

Constants

"judgee:categories"
CATEGORY_KEY =
"judgee:category"
ALPHA =
1.0

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(options = {}) ⇒ Classifier

Returns a new instance of Classifier.



18
19
20
# File 'lib/judgee/classifier.rb', line 18

def initialize(options={})
  @redis = Redis.new(options)
end

Instance Attribute Details

#redisObject (readonly)

Returns the value of attribute redis.



16
17
18
# File 'lib/judgee/classifier.rb', line 16

def redis
  @redis
end

Instance Method Details

#classify(data) ⇒ Object



66
67
68
69
70
71
72
73
74
75
76
77
78
79
# File 'lib/judgee/classifier.rb', line 66

def classify(data)
  result      = Hash.new(0)
  categories  = redis.smembers(CATEGORIES_KEY)

  categories.each do |category|
    count_occurance(data).each do |word, word_count|
      numerator   = (redis.hget(redis_category_key(category), word).to_i + ALPHA).to_f
      denominator = (categories.map { |category| redis.hget(redis_category_key(category), word).to_i }.inject(0, :+) + (ALPHA * data.length)).to_f
      result[category] += (word_count * Math.log(numerator / denominator)).abs
    end
  end

  result.min_by(&:last).first.to_sym
end

#classify_fast(data) ⇒ Object



81
82
83
84
85
86
87
88
89
90
91
92
93
# File 'lib/judgee/classifier.rb', line 81

def classify_fast(data)
  result      = Hash.new(0)
  categories  = redis.smembers(CATEGORIES_KEY)
  occurances  = count_occurance(data)

  categories.each do |category|
    numerator   = Hash[occurances.keys.zip(redis.hmget(redis_category_key(category), occurances.keys))].inject({}) { |hash, (key, value)| hash[key] = value.to_f + ALPHA; hash }
    denominator = categories.map { |category| Hash[occurances.keys.zip(redis.hmget(redis_category_key(category), occurances.keys))] }.inject(Hash.new(0)) { |main_hash, sub_hash| main_hash.merge(sub_hash) { |key, value_first, value_second| value_first.to_f + value_second.to_f} }.inject(Hash.new(0)) { |hash, (key, value)| hash[key] = value.to_f + (ALPHA * data.length); hash }
    result[category] += numerator.merge(denominator) { |key, value_numerator, value_denominator| (occurances[key] * Math.log(value_numerator / value_denominator)).abs }.values.inject(0, :+)
  end

  result.min_by(&:last).first.to_sym
end

#flush_category(category) ⇒ Object



103
104
105
106
# File 'lib/judgee/classifier.rb', line 103

def flush_category(category)
  redis.del(redis_category_key(category))
  redis.srem(CATEGORIES_KEY, category_name(category))
end

#flushdb(flush_db = false) ⇒ Object



97
98
99
# File 'lib/judgee/classifier.rb', line 97

def flushdb(flush_db=false)
  redis.flushdb if flush_db
end

#train(category, data) ⇒ Object



23
24
25
26
27
28
29
# File 'lib/judgee/classifier.rb', line 23

def train(category, data)
  redis.sadd(CATEGORIES_KEY, category_name(category))
  count_occurance(data).each do |word, word_count|
    redis.hincrby(redis_category_key(category), word, word_count)
  end
  "OK"
end

#train_fast(category, data) ⇒ Object



31
32
33
34
35
36
37
38
# File 'lib/judgee/classifier.rb', line 31

def train_fast(category, data)
  redis.sadd(CATEGORIES_KEY, category_name(category))
  occurances          = count_occurance(data)
  database_occurances = Hash[occurances.keys.zip(redis.hmget(redis_category_key(category), occurances.keys))]
  new_occurances      = occurances.merge(database_occurances) { |key, value_occurance, value_database_occurance| value_occurance.to_i + value_database_occurance.to_i }.to_a.flatten!
  redis.hmset(redis_category_key(category), new_occurances)
  "OK"
end

#untrain(category, data) ⇒ Object



42
43
44
45
46
47
48
49
50
51
52
# File 'lib/judgee/classifier.rb', line 42

def untrain(category, data)
  count_occurance(data).each do |word, word_count|
    new_count = [(redis.hget(redis_category_key(category), word).to_i - word_count), 0].max
    if new_count > 0
      redis.hset(redis_category_key(category), word, new_count)
    else
      redis.hdel(redis_category_key(category), word)
    end
  end
  "OK"
end

#untrain_fast(category, data) ⇒ Object



54
55
56
57
58
59
60
61
62
# File 'lib/judgee/classifier.rb', line 54

def untrain_fast(category, data)
  occurances          = count_occurance(data)
  database_occurances = Hash[occurances.keys.zip(redis.hmget(redis_category_key(category), occurances.keys))]
  untrain_occurances  = database_occurances.merge(occurances) { |key, value_occurance, value_untrain_occurance| value_occurance.to_i - value_untrain_occurance.to_i }
  empty_occurances    = untrain_occurances.select { |key, value| value.to_i <= 0 }
  redis.hmset(redis_category_key(category), untrain_occurances.to_a.flatten!)
  redis.hdel(redis_category_key(category), empty_occurances.keys) unless empty_occurances.empty?
  "OK"
end