Class: BayesOnRedis

Inherits:
Object
  • Object
show all
Defined in:
lib/bayes_on_redis.rb

Constant Summary collapse

CATEGORIES_KEY =
"BayesOnRedis:categories"
ONE_OR_TWO_WORDS_RE =
/\b\w{1,2}\b/mi
NON_ALPHANUMERIC_AND_NON_DOT_RE =
/[^\w\.]/mi

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(options) ⇒ BayesOnRedis

Returns a new instance of BayesOnRedis.



11
12
13
14
# File 'lib/bayes_on_redis.rb', line 11

def initialize(options)
  @redis = Redis.new(:host => options[:redis_host], :port => options[:redis_port], :db => options[:redis_db])
  @stopwords = Stopword.new
end

Instance Attribute Details

#redisObject (readonly)

Returns the value of attribute redis.



9
10
11
# File 'lib/bayes_on_redis.rb', line 9

def redis
  @redis
end

#stopwordsObject (readonly)

Returns the value of attribute stopwords.



9
10
11
# File 'lib/bayes_on_redis.rb', line 9

def stopwords
  @stopwords
end

Instance Method Details

#classify(text) ⇒ Object



66
67
68
# File 'lib/bayes_on_redis.rb', line 66

def classify(text)
  (score(text).sort_by { |score| -score[1] })[0][0]    # [0][0] -> first score, get the key
end

#flushdbObject



16
17
18
# File 'lib/bayes_on_redis.rb', line 16

def flushdb
  @redis.flushdb
end

#score(text) ⇒ Object



46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/bayes_on_redis.rb', line 46

def score(text)
  scores = {}

  @redis.smembers(CATEGORIES_KEY).each do |category|
    words_count_per_category = @redis.hvals(redis_category_key(category)).inject(0) {|sum, score| sum + score.to_i}
    @redis.srem(CATEGORIES_KEY, category) if words_count_per_category <= 0

    scores[category] = 0

    count_occurance(text).each do |word, count|
      tmp_score = @redis.hget(redis_category_key(category), word).to_i
      tmp_score = 0.1 if tmp_score <= 0

      scores[category] += Math.log(tmp_score / words_count_per_category.to_f)
    end
  end

  return scores
end

#train(category, text) ⇒ Object Also known as: learn

training for a category



21
22
23
24
25
26
27
28
# File 'lib/bayes_on_redis.rb', line 21

def train(category, text)
  category = category.downcase
  @redis.sadd(CATEGORIES_KEY, category)

  count_occurance(text).each do |word, count|
    @redis.hincrby(redis_category_key(category), word, count)
  end
end

#untrain(category, text) ⇒ Object Also known as: unlearn



31
32
33
34
35
36
37
38
39
40
41
42
43
# File 'lib/bayes_on_redis.rb', line 31

def untrain(category, text)
  category = category.downcase

  count_occurance(text).each do |word, count|
    word_count_atm = @redis.hget(redis_category_key(category), word)
    if (word_count_atm >= count)
      new_count = (word_count_atm - count)
    else
      new_count = 0
    end
    @redis.hset(redis_category_key(category), word, new_count)
  end
end