Class: Yada::Markov
- Inherits:
-
Object
- Object
- Yada::Markov
- Defined in:
- lib/yada/markov.rb
Defined Under Namespace
Classes: Join
Constant Summary collapse
- START =
:__start- STOP =
:__stop
Instance Attribute Summary collapse
-
#tokens ⇒ Object
readonly
Returns the value of attribute tokens.
Instance Method Summary collapse
-
#initialize(n = 1, tokenize = /[\w\-\/]+|[^\s]+/, join = ' ') ⇒ Markov
constructor
A new instance of Markov.
- #train!(data) ⇒ Object
- #transition_probability(ngram, token) ⇒ Object
Constructor Details
#initialize(n = 1, tokenize = /[\w\-\/]+|[^\s]+/, join = ' ') ⇒ Markov
Returns a new instance of Markov.
10 11 12 13 14 15 |
# File 'lib/yada/markov.rb', line 10 def initialize(n = 1, tokenize = /[\w\-\/]+|[^\s]+/, join = ' ') @n, @tokenize, @join = n, tokenize, join @transition_count = Hash.new(0) @ngram_count = Hash.new(0) @tokens = Set.new end |
Instance Attribute Details
#tokens ⇒ Object (readonly)
Returns the value of attribute tokens.
8 9 10 |
# File 'lib/yada/markov.rb', line 8 def tokens @tokens end |
Instance Method Details
#train!(data) ⇒ Object
17 18 19 20 21 22 23 24 25 26 27 28 29 |
# File 'lib/yada/markov.rb', line 17 def train!(data) prefix = [START] * @n suffix = [STOP] data.each do |text| Join.new(prefix, text.scan(@tokenize), suffix).each_cons(@n + 1) do |*ngram, token| joined_ngram = ngram.join(@join) @tokens.add(token) @transition_count[[joined_ngram, token]] += 1 @ngram_count[joined_ngram] += 1 end end end |
#transition_probability(ngram, token) ⇒ Object
31 32 33 34 35 |
# File 'lib/yada/markov.rb', line 31 def transition_probability(ngram, token) joined_ngram = ngram.join(@join) return 0 if @ngram_count[joined_ngram] == 0 Rational(@transition_count[[joined_ngram, token]], @ngram_count[joined_ngram]) end |