Class: Entropic::Model

Inherits:
Object
  • Object
show all
Defined in:
lib/entropic.rb

Overview

Public; A model for entropy

Constant Summary collapse

VERSION =
'1.0.0'.freeze

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(size) ⇒ Model

Returns a new instance of Model.



89
90
91
92
# File 'lib/entropic.rb', line 89

def initialize(size)
  @size = size
  @counter = NGramCounter.new(size)
end

Instance Attribute Details

#counterObject

Returns the value of attribute counter.



87
88
89
# File 'lib/entropic.rb', line 87

def counter
  @counter
end

#sizeObject

Returns the value of attribute size.



87
88
89
# File 'lib/entropic.rb', line 87

def size
  @size
end

Class Method Details

.read(io) ⇒ Object

Public: create a Model from reading from an IO object

io: the IOReader

returns: Model with stats filled in, and size of largest ngram



183
184
185
186
187
188
189
190
191
192
193
194
195
# File 'lib/entropic.rb', line 183

def self.read(io)
  model = nil
  io.each_line do |string|
    ngram_size, ngram, count = string.strip.split(/\t/)
    ngram_size = ngram_size.to_i
    count = count.to_f
    model = Model.new(ngram_size) unless model
    counter = model.counter
    counter.total += count
    counter.counts[ngram] = count
  end
  model
end

Instance Method Details

#dump(io) ⇒ Object

Public: dump model to some io object

io: the IOWriter to write to



146
147
148
149
150
# File 'lib/entropic.rb', line 146

def dump(io)
  @counter.counts.each do |ngram, count|
    io.write("#{@size}\t#{ngram}\t#{count}\n")
  end
end

#entropy(string) ⇒ Object

Public: predict the entropy over a string

which will be split into ngrams

string: The String to query

returns: entropy



174
175
176
# File 'lib/entropic.rb', line 174

def entropy(string)
  -predict(string)[:log_prob_average]
end

#log_prob(key) ⇒ Object

Public: log probability of a ngram string in a model returns value of first suffix of string or log_prob of a 1-gram appearing once if no suffix found

Examples

model = Model.new(2) model.update(‘01234’) model.log_prob(‘01’)

string: The String to query



133
134
135
136
137
138
139
140
# File 'lib/entropic.rb', line 133

def log_prob(key)
  if @counter.total == 0 || !key || key == ''
    return Math.log(0, 2.0) # -Infinity
  end

  count = @counter.count(key, 0.5)
  Math.log(count, 2.0) - Math.log(counter.total, 2.0)
end

#predict(string) ⇒ Object

Public: predict the log_prob sum and average over a string

which will be split into ngrams

string: The String to query

returns: a dictionary of

- log_prob_total
- log_prob_average
- size (number of ngrams in string)


161
162
163
164
165
166
# File 'lib/entropic.rb', line 161

def predict(string)
  ngrams = Entropic.sliding(string, @size)
  log_prob_total = ngrams.map { |ngram| log_prob(ngram) }.inject(0.0, :+)
  log_prob_average = log_prob_total / ngrams.size.to_f
  { log_prob_total: log_prob_total, log_prob_average: log_prob_average, size: ngrams.size }
end

#train(io) ⇒ Object

Public: Train a model on a bunch of data, line by line

io: the IOReader



201
202
203
204
205
# File 'lib/entropic.rb', line 201

def train(io)
  io.each_line do |string|
    update(string.strip)
  end
end

#train_with_multiplier(io) ⇒ Object

Public: Train a model on a bunch of data, line by line,

with a multiplier
each data line should be <string><tab><multiplier>

io: the IOReader



213
214
215
216
217
218
219
# File 'lib/entropic.rb', line 213

def train_with_multiplier(io)
  io.each_line do |string|
    text, count = string.strip.split(/\t/)
    count = count.to_i
    update_with_multiplier(text, count)
  end
end

#update(string) ⇒ Object

Public: update a model with a string, with mulitplier or 1

Examples

model = Model.new(2) model.update(‘01234’)

string: The String to update with



117
118
119
# File 'lib/entropic.rb', line 117

def update(string)
  update_with_multiplier(string, 1)
end

#update_with_multiplier(string, multiplier) ⇒ Object

Public: update a model with a string, and a multiplier

Examples

model = Model.new(2) model.update_with_multiplier(‘01234’, 1)

string: The String to update with multiplier: The Integer describing how much weight (will often be 1)



104
105
106
# File 'lib/entropic.rb', line 104

def update_with_multiplier(string, multiplier)
  @counter.update_with_multiplier(string, multiplier)
end