Class: Yada::Markov

Inherits:
Object
  • Object
show all
Defined in:
lib/yada/markov.rb

Defined Under Namespace

Classes: Join

Constant Summary collapse

START =
:__start
STOP =
:__stop

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(n = 1, tokenize = /[\w\-\/]+|[^\s]+/, join = ' ') ⇒ Markov

Returns a new instance of Markov.



10
11
12
13
14
15
# File 'lib/yada/markov.rb', line 10

def initialize(n = 1, tokenize = /[\w\-\/]+|[^\s]+/, join = ' ')
  @n, @tokenize, @join = n, tokenize, join
  @transition_count = Hash.new(0)
  @ngram_count = Hash.new(0)
  @tokens = Set.new
end

Instance Attribute Details

#tokensObject (readonly)

Returns the value of attribute tokens.



8
9
10
# File 'lib/yada/markov.rb', line 8

def tokens
  @tokens
end

Instance Method Details

#train!(data) ⇒ Object



17
18
19
20
21
22
23
24
25
26
27
28
29
# File 'lib/yada/markov.rb', line 17

def train!(data)
  prefix = [START] * @n
  suffix = [STOP]

  data.each do |text|
    Join.new(prefix, text.scan(@tokenize), suffix).each_cons(@n + 1) do |*ngram, token|
      joined_ngram = ngram.join(@join)
      @tokens.add(token)
      @transition_count[[joined_ngram, token]] += 1
      @ngram_count[joined_ngram] += 1
    end
  end
end

#transition_probability(ngram, token) ⇒ Object



31
32
33
34
35
# File 'lib/yada/markov.rb', line 31

def transition_probability(ngram, token)
  joined_ngram = ngram.join(@join)
  return 0 if @ngram_count[joined_ngram] == 0
  Rational(@transition_count[[joined_ngram, token]], @ngram_count[joined_ngram])
end