Class: Elastic::Stats::NaiveBayes::Set

Inherits:
Object
  • Object
show all
Includes:
ElasticClient
Defined in:
lib/elastic/stats/naive-bayes/set.rb

Overview

A set of documents against which statistics will be calculated

Instance Attribute Summary collapse

Attributes included from ElasticClient

#client

Instance Method Summary collapse

Methods included from ElasticClient

#client_options, #client_options=

Constructor Details

#initialize(index, type, category_field, subject_field) ⇒ Set

Returns a new instance of Set.



13
14
15
16
17
18
# File 'lib/elastic/stats/naive-bayes/set.rb', line 13

def initialize(index, type, category_field, subject_field)
  @index = index
  @type = type
  @category_field = category_field
  @subject_field = subject_field
end

Instance Attribute Details

#category_fieldObject (readonly)

Returns the value of attribute category_field.



11
12
13
# File 'lib/elastic/stats/naive-bayes/set.rb', line 11

def category_field
  @category_field
end

#indexObject (readonly)

Returns the value of attribute index.



11
12
13
# File 'lib/elastic/stats/naive-bayes/set.rb', line 11

def index
  @index
end

#subject_fieldObject (readonly)

Returns the value of attribute subject_field.



11
12
13
# File 'lib/elastic/stats/naive-bayes/set.rb', line 11

def subject_field
  @subject_field
end

#typeObject (readonly)

Returns the value of attribute type.



11
12
13
# File 'lib/elastic/stats/naive-bayes/set.rb', line 11

def type
  @type
end

Instance Method Details

#analyze(options = {}) ⇒ Object



53
54
55
# File 'lib/elastic/stats/naive-bayes/set.rb', line 53

def analyze(options = {})
  client.indices.analyze({ index: index }.merge(options))
end

#categoriesObject



25
26
27
28
# File 'lib/elastic/stats/naive-bayes/set.rb', line 25

def categories
  init_stats if @categories.nil?
  @categories
end

#countObject



20
21
22
23
# File 'lib/elastic/stats/naive-bayes/set.rb', line 20

def count
  init_stats if @count.nil?
  @count
end

#search(options = {}) ⇒ Object

Elasticsearch client helper methods



49
50
51
# File 'lib/elastic/stats/naive-bayes/set.rb', line 49

def search(options = {})
  client.search({ index: index, type: type }.merge(options))
end

#token_categoriesObject



34
35
36
37
38
39
40
41
# File 'lib/elastic/stats/naive-bayes/set.rb', line 34

def token_categories
  @token_categories ||= Hash.new do |h, k|
    result = count_search[k]['aggregations']['counts']['buckets'].map do |bucket|
      { bucket['key'] => bucket['doc_count'] }
    end
    h[k] = Hash.new(0).merge(result.reduce(:merge))
  end
end

#tokenize(subject) ⇒ Object



43
44
45
46
# File 'lib/elastic/stats/naive-bayes/set.rb', line 43

def tokenize(subject)
  results = analyze field: subject_field, text: subject
  results['tokens'].collect { |x| x['token'] }
end

#tokensObject



30
31
32
# File 'lib/elastic/stats/naive-bayes/set.rb', line 30

def tokens
  @tokens ||= Hash.new { |h, k| h[k] = count_search[k]['hits']['total'] }
end