Class: Markov::Generator

Inherits:
Object
  • Object
show all
Defined in:
lib/markov/generator.rb

Defined Under Namespace

Classes: EmptyDictionaryError, FileNotFoundError

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(depth = 3) ⇒ Generator

Returns a new instance of Generator.



14
15
16
17
18
19
20
21
22
23
24
25
26
# File 'lib/markov/generator.rb', line 14

def initialize(depth=3)
  @depth = depth
  
  @split_sentence = /(?<=[.?!])\s+/
  @split_words = /([,.?!])|[\s]/
  @replace_chars = /[„':;_"()]/
  
  @dictionary = {}
  @start_words = {}
  @unparsed_sentences = []
  @tokens = []
  srand
end

Instance Attribute Details

#depthObject (readonly)

Returns the value of attribute depth.



12
13
14
# File 'lib/markov/generator.rb', line 12

def depth
  @depth
end

Instance Method Details

#dump_dictionaryObject



107
108
109
110
111
112
113
114
115
116
117
# File 'lib/markov/generator.rb', line 107

def dump_dictionary
  @dictionary.keys.each do |words|
    following = @dictionary[words]
    sentence = "#{words[0]},#{words[1]},"
    following.each do |s|
      sentence << "#{s.word},"
    end
    
    puts "#{sentence.slice(0,sentence.length-1)}"
  end
end

#dump_dictionary_statsObject



119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# File 'lib/markov/generator.rb', line 119

def dump_dictionary_stats
  puts "Keys: #{@dictionary.keys.size}"
  dist = {}
  n = 0
  @dictionary.keys.each do |words|
    following = @dictionary[words]
    size = following.size
    if dist[size]
      dist[size] = dist[size] + following.size
    else
      dist[size] = following.size
    end
    n = n + following.size
  end
  
  dist.keys.sort.each do |s|
    puts "BUCKET: #{s}\t=#{dist[s]} (#{((dist[s].to_f/n.to_f)*100).to_i}%)"
  end
  
end

#dump_start_wordsObject



101
102
103
104
105
# File 'lib/markov/generator.rb', line 101

def dump_start_words
  @start_words.keys.each do |words|
    puts "#{words[0]},#{words[1]}"
  end
end

#generate_sentence(min_length = 20) ⇒ Object



55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# File 'lib/markov/generator.rb', line 55

def generate_sentence(min_length=20)
  if @dictionary.empty?
    raise EmptyDictionaryError.new("The dictionary is empty! Parse a source file/string first!")
  end
  
  tokens = []
  complete_sentence = false
  
  # initialize
  select_start_words.each {|w| tokens << w}
  prev_token = tokens.last
  
  begin
    token =  select_next_token tokens.last(@depth-1)
    
    if token.kind == :stop
      token =  select_next_word tokens.last(@depth-1) if prev_token.kind == :special
      tokens << token
    elsif token.kind == :special
      token =  select_next_word tokens.last(@depth-1) if prev_token.kind == :special
      tokens << token
    elsif token.kind == :noop
      token = Token.new(".", :stop)
      tokens[tokens.length-1] = token
    else
      tokens << token
    end
    
    prev_token = token
    
    if token.kind == :stop
      if tokens.size < min_length
        select_start_words.each {|w| tokens << w}
        prev_token = tokens.last
      else
        complete_sentence = true
      end
    end
    
    # circuit-breaker
    complete_sentence = true if tokens.size > min_length*2 
  end until complete_sentence
  
  tokens_to_sentence tokens
end

#parse_source_file(source) ⇒ Object



39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# File 'lib/markov/generator.rb', line 39

def parse_source_file(source)
  
  if File.exists?(source)
    sentences = File.open(source, "r").read.force_encoding(Encoding::UTF_8).split(@split_sentence)
  else
    raise FileNotFoundError.new("#{source} does not exist!")
  end
  
  sentences.each do |sentence|
    add_unparsed_sentence sentence
  end
  
  parse_text
  
end

#parse_string(sentence) ⇒ Object



34
35
36
37
# File 'lib/markov/generator.rb', line 34

def parse_string(sentence)
  add_unparsed_sentence sentence
  parse_text
end