Class: Markov::Generator

Inherits:
Object
  • Object
show all
Includes:
Util
Defined in:
lib/markov/generator.rb

Instance Method Summary collapse

Methods included from Util

#random_number, #tokens_to_debug, #tokens_to_sentence, #tokens_to_words

Constructor Details

#initialize(depth) ⇒ Generator

Returns a new instance of Generator.



7
8
9
10
11
12
13
14
15
# File 'lib/markov/generator.rb', line 7

def initialize(depth)
  @depth = depth
  @unparsed_sentences = []
  @tokens = []
  
  @dict = Markov::Dictionary.new(depth)
  
  srand
end

Instance Method Details

#dump_dictionaryObject



133
134
135
# File 'lib/markov/generator.rb', line 133

def dump_dictionary
  @dict.dump_dictionary
end

#dump_startwordsObject



127
128
129
130
131
# File 'lib/markov/generator.rb', line 127

def dump_startwords
  @start_words.keys.each do |start_words|
    puts "#{start_words}"
  end
end

#generate_sentence(min_length = 15) ⇒ Object

end parse_text



69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# File 'lib/markov/generator.rb', line 69

def generate_sentence(min_length=15)
  if @dict.empty?
    raise EmptyDictionaryError.new("The dictionary is empty! Parse a source file/string first!")
  end
  
  tokens = []
  complete_sentence = false
  
  # initialize
  @dict.select_start_words.each {|w| tokens << w}
  prev_token = tokens.last
  
  begin
    token =  @dict.select_next_token tokens.last(@depth-1)
    
    if token.kind == :word
      tokens << token
      prev_token = token
    elsif token.kind == :special
      if prev_token.kind == :word
        tokens << token
        prev_token = token
      end
    elsif token.kind == :stop
      if prev_token.kind == :word
        tokens << token
        prev_token = token
      end
    elsif token.kind == :noop
      if prev_token.kind == :word
        tokens << Markov::Token.new(".", :stop)
      end
      # start a new sentence
      @dict.select_start_words.each {|w| tokens << w}
      prev_token = tokens.last
    end
    
    if (token.kind == :stop) && (tokens.size > min_length)
      #puts "-- DONE(#{tokens.size}) #{tokens_to_debug tokens}"
      return tokens_to_sentence tokens
    end
    
    # default circuit-breaker
    if tokens.size > min_length * 4
      # restart
      tokens = []
      complete_sentence = false
  
      # initialize
      @dict.select_start_words.each {|w| tokens << w}
      prev_token = tokens.last
    end
    
  end until complete_sentence
  
  tokens_to_sentence tokens
end

#parse_text(source) ⇒ Object



17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# File 'lib/markov/generator.rb', line 17

def parse_text(source)
  
  parser = Markov::Parser.new
  parser.load_text source
  
  state = :start # :start, :word, :special, :stop
  word_seq = []
  
  begin
    while token = parser.next_token
      
      if state == :start
        word_seq << token
        
        # fill the array
        (@depth-word_seq.size).times do
          word_seq << parser.next_token
        end
        
        # need to store the words in both the dictionary 
        # and the list of start words
        @dict.add_to_start_words word_seq[0, @depth-1]
        @dict.add_to_dictionary word_seq
        
        token = parser.next_token
        state = :sentence
      end
      
      if state == :sentence
        # move the array one position
        word_seq.slice!(0)
        word_seq << token
        
        # add to the dictionary
        @dict.add_to_dictionary word_seq
        
        # stop current sequence and start again
        if token == nil || token.kind == :stop
          word_seq = []
          state = :start
        end  
      end
      
    end
  rescue => e
    # nothing to rescue
    puts e
    puts e.backtrace
  end
  
end