Class: NBayes::Data

Inherits:
Object
  • Object
show all
Defined in:
lib/nbayes.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(options = {}) ⇒ Data

Returns a new instance of Data.



47
48
49
50
51
52
53
54
55
56
57
# File 'lib/nbayes.rb', line 47

def initialize(options = {})
  @data = Hash.new
  #@data = {
  #  "category1": {
  #    "tokens": Hash.new(0),
  #    "total_tokens": 0,
  #    "examples": 0
  #  },
  # ...
  #}
end

Instance Attribute Details

#dataObject

Returns the value of attribute data.



46
47
48
# File 'lib/nbayes.rb', line 46

def data
  @data
end

Instance Method Details

#add_token_to_category(category, token) ⇒ Object

Add this token to this category



118
119
120
121
# File 'lib/nbayes.rb', line 118

def add_token_to_category(category, token)
  cat_data(category)[:tokens][token] += 1
  cat_data(category)[:total_tokens] += 1
end

#cat_data(category) ⇒ Object



67
68
69
70
71
72
# File 'lib/nbayes.rb', line 67

def cat_data(category)
  unless data[category].is_a? Hash
    data[category] = new_category
  end
  data[category]
end

#categoriesObject



59
60
61
# File 'lib/nbayes.rb', line 59

def categories
  data.keys
end

#category_statsObject



74
75
76
77
78
79
80
81
82
83
# File 'lib/nbayes.rb', line 74

def category_stats
  tmp = []
  total_example_count = total_examples
  self.each do |category|
    e = example_count(category)
    t = token_count(category)
    tmp << "For category #{category}, %d examples (%.02f%% of the total) and %d total_tokens" % [e, 100.0 * e / total_example_count, t]
  end
  tmp.join("\n")
end

#count_of_token_in_category(category, token) ⇒ Object

How many times does this token appear in this category?



134
135
136
# File 'lib/nbayes.rb', line 134

def count_of_token_in_category(category, token)
  cat_data(category)[:tokens][token]
end

#decrement_examples(category) ⇒ Object

Decrement the number of training examples for this category. Delete the category if the examples counter is 0.



96
97
98
99
# File 'lib/nbayes.rb', line 96

def decrement_examples(category)
  cat_data(category)[:examples] -= 1
  delete_category(category) if cat_data(category)[:examples] < 1
end

#delete_category(category) ⇒ Object



171
172
173
174
# File 'lib/nbayes.rb', line 171

def delete_category(category)
  data.delete(category) if data.has_key?(category)
  categories
end

#delete_token_from_category(category, token) ⇒ Object



138
139
140
141
142
143
# File 'lib/nbayes.rb', line 138

def delete_token_from_category(category, token)
  count = count_of_token_in_category(category, token)
  cat_data(category)[:tokens].delete(token)
  # Update this category's total token count
  cat_data(category)[:total_tokens] -= count
end

#each(&block) ⇒ Object



85
86
87
# File 'lib/nbayes.rb', line 85

def each(&block)
  data.keys.each(&block)
end

#example_count(category) ⇒ Object



101
102
103
# File 'lib/nbayes.rb', line 101

def example_count(category)
  cat_data(category)[:examples]
end

#increment_examples(category) ⇒ Object

Increment the number of training examples for this category



90
91
92
# File 'lib/nbayes.rb', line 90

def increment_examples(category)
  cat_data(category)[:examples] += 1
end

#new_categoryObject



163
164
165
166
167
168
169
# File 'lib/nbayes.rb', line 163

def new_category
  {
    :tokens => Hash.new(0),             # holds freq counts
    :total_tokens => 0,
    :examples => 0
  }
end

#purge_less_than(token, x) ⇒ Object



145
146
147
148
149
150
151
# File 'lib/nbayes.rb', line 145

def purge_less_than(token, x)
  return if token_count_across_categories(token) >= x
  self.each do |category|
    delete_token_from_category(category, token)
  end
  true  # Let caller know we removed this token
end

#remove_token_from_category(category, token) ⇒ Object

Decrement the token counter in a category If the counter is 0, delete the token. If the total number of tokens is 0, delete the category.



126
127
128
129
130
131
# File 'lib/nbayes.rb', line 126

def remove_token_from_category(category, token)
  cat_data(category)[:tokens][token] -= 1
  delete_token_from_category(category, token) if cat_data(category)[:tokens][token] < 1
  cat_data(category)[:total_tokens] -= 1
  delete_category(category) if cat_data(category)[:total_tokens] < 1
end

#reset_after_importObject



159
160
161
# File 'lib/nbayes.rb', line 159

def reset_after_import
  self.each {|category| cat_data(category)[:tokens].default = 0 }
end

#token_count(category) ⇒ Object



105
106
107
# File 'lib/nbayes.rb', line 105

def token_count(category)
  cat_data(category)[:total_tokens]
end

#token_count_across_categories(token) ⇒ Object

XXX - TODO - use count_of_token_in_category Return the total number of tokens we’ve seen across all categories



155
156
157
# File 'lib/nbayes.rb', line 155

def token_count_across_categories(token)
  data.keys.inject(0){|sum, cat| sum + @data[cat][:tokens][token] }
end

#token_trained?(token, category) ⇒ Boolean

Returns:

  • (Boolean)


63
64
65
# File 'lib/nbayes.rb', line 63

def token_trained?(token, category)
  data[category] ? data[category][:tokens].has_key?(token) : false
end

#total_examplesObject

XXX - Add Enumerable and see if I get inject? Total number of training instances



111
112
113
114
115
# File 'lib/nbayes.rb', line 111

def total_examples
  sum = 0
  self.each {|category| sum += example_count(category) }
  sum
end