Class: Elastic::Stats::NaiveBayes::Predictor

Inherits:
Object
  • Object
show all
Includes:
ElasticClient
Defined in:
lib/elastic/stats/naive-bayes/predictor.rb

Overview

Utility to perform Naive Bayes category predictions on text

Instance Attribute Summary collapse

Attributes included from ElasticClient

#client, #index, #type

Instance Method Summary collapse

Methods included from ElasticClient

#analyze, #client_options, #client_options=, #search

Constructor Details

#initialize(prior_set) ⇒ Predictor



14
15
16
# File 'lib/elastic/stats/naive-bayes/predictor.rb', line 14

def initialize(prior_set)
  @prior_set = prior_set
end

Instance Attribute Details

#adjust=(value) ⇒ Object (writeonly)

Sets the attribute adjust



12
13
14
# File 'lib/elastic/stats/naive-bayes/predictor.rb', line 12

def adjust=(value)
  @adjust = value
end

#prior_setObject (readonly)

Returns the value of attribute prior_set.



11
12
13
# File 'lib/elastic/stats/naive-bayes/predictor.rb', line 11

def prior_set
  @prior_set
end

Instance Method Details

#guess(subject) ⇒ Object



18
19
20
21
22
23
24
# File 'lib/elastic/stats/naive-bayes/predictor.rb', line 18

def guess(subject)
  scores = {}
  prior_set.categories.keys.each do |category|
    scores[category] = score(subject, category)
  end
  Hash[scores.sort_by { |label, score| -score }]
end

#score(subject, category) ⇒ Object



26
27
28
29
30
31
32
33
34
# File 'lib/elastic/stats/naive-bayes/predictor.rb', line 26

def score(subject, category)
  # Calculate the propability for each token in this category
  log_sum = tokenize(subject).reduce(0) do |sum, token|
    stats = TokenStats.new(token, prior_set)
    sum + stats.bayes(category)
  end

  1 / (1 + Math.exp(log_sum))
end

#tokenize(subject) ⇒ Object



36
37
38
39
# File 'lib/elastic/stats/naive-bayes/predictor.rb', line 36

def tokenize(subject)
  @tokenize ||= Hash.new { |h, k| h[k] = prior_set.tokenize k }
  @tokenize[subject]
end