Class: BayesOnRedis
- Inherits:
-
Object
- Object
- BayesOnRedis
- Defined in:
- lib/bayes_on_redis.rb
Constant Summary collapse
- CATEGORIES_KEY =
"BayesOnRedis:categories"
- ONE_OR_TWO_WORDS_RE =
/\b\w{1,2}\b/mi
- NON_ALPHANUMERIC_AND_NON_DOT_RE =
/[^\w\.]/mi
Instance Attribute Summary collapse
-
#redis ⇒ Object
readonly
Returns the value of attribute redis.
-
#stopwords ⇒ Object
readonly
Returns the value of attribute stopwords.
Instance Method Summary collapse
- #classify(text) ⇒ Object
- #flushdb ⇒ Object
-
#initialize(options) ⇒ BayesOnRedis
constructor
A new instance of BayesOnRedis.
- #score(text) ⇒ Object
-
#train(category, text) ⇒ Object
(also: #learn)
training for a category.
- #untrain(category, text) ⇒ Object (also: #unlearn)
Constructor Details
#initialize(options) ⇒ BayesOnRedis
Returns a new instance of BayesOnRedis.
11 12 13 14 |
# File 'lib/bayes_on_redis.rb', line 11 def initialize() @redis = Redis.new(:host => [:redis_host], :port => [:redis_port], :db => [:redis_db]) @stopwords = Stopword.new end |
Instance Attribute Details
#redis ⇒ Object (readonly)
Returns the value of attribute redis.
9 10 11 |
# File 'lib/bayes_on_redis.rb', line 9 def redis @redis end |
#stopwords ⇒ Object (readonly)
Returns the value of attribute stopwords.
9 10 11 |
# File 'lib/bayes_on_redis.rb', line 9 def stopwords @stopwords end |
Instance Method Details
#classify(text) ⇒ Object
66 67 68 |
# File 'lib/bayes_on_redis.rb', line 66 def classify(text) (score(text).sort_by { |score| -score[1] })[0][0] # [0][0] -> first score, get the key end |
#flushdb ⇒ Object
16 17 18 |
# File 'lib/bayes_on_redis.rb', line 16 def flushdb @redis.flushdb end |
#score(text) ⇒ Object
46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
# File 'lib/bayes_on_redis.rb', line 46 def score(text) scores = {} @redis.smembers(CATEGORIES_KEY).each do |category| words_count_per_category = @redis.hvals(redis_category_key(category)).inject(0) {|sum, score| sum + score.to_i} @redis.srem(CATEGORIES_KEY, category) if words_count_per_category <= 0 scores[category] = 0 count_occurance(text).each do |word, count| tmp_score = @redis.hget(redis_category_key(category), word).to_i tmp_score = 0.1 if tmp_score <= 0 scores[category] += Math.log(tmp_score / words_count_per_category.to_f) end end return scores end |
#train(category, text) ⇒ Object Also known as: learn
training for a category
21 22 23 24 25 26 27 28 |
# File 'lib/bayes_on_redis.rb', line 21 def train(category, text) category = category.downcase @redis.sadd(CATEGORIES_KEY, category) count_occurance(text).each do |word, count| @redis.hincrby(redis_category_key(category), word, count) end end |
#untrain(category, text) ⇒ Object Also known as: unlearn
31 32 33 34 35 36 37 38 39 40 41 42 43 |
# File 'lib/bayes_on_redis.rb', line 31 def untrain(category, text) category = category.downcase count_occurance(text).each do |word, count| word_count_atm = @redis.hget(redis_category_key(category), word) if (word_count_atm >= count) new_count = (word_count_atm - count) else new_count = 0 end @redis.hset(redis_category_key(category), word, new_count) end end |