Class: Markov::Generator
- Inherits:
-
Object
show all
- Defined in:
- lib/markov/generator.rb
Defined Under Namespace
Classes: EmptyDictionaryError, FileNotFoundError
Instance Attribute Summary collapse
Instance Method Summary
collapse
Constructor Details
#initialize(depth = 3) ⇒ Generator
Returns a new instance of Generator.
14
15
16
17
18
19
20
21
22
23
24
25
26
|
# File 'lib/markov/generator.rb', line 14
def initialize(depth=3)
@depth = depth
@split_sentence = /(?<=[.?!])\s+/
@split_words = /([,.?!])|[\s]/
@replace_chars = /[„':;_"()]/
@dictionary = {}
@start_words = {}
@unparsed_sentences = []
@tokens = []
srand
end
|
Instance Attribute Details
#depth ⇒ Object
Returns the value of attribute depth.
12
13
14
|
# File 'lib/markov/generator.rb', line 12
def depth
@depth
end
|
Instance Method Details
#dump_dictionary ⇒ Object
107
108
109
110
111
112
113
114
115
116
117
|
# File 'lib/markov/generator.rb', line 107
def dump_dictionary
@dictionary.keys.each do |words|
following = @dictionary[words]
sentence = "#{words[0]},#{words[1]},"
following.each do |s|
sentence << "#{s.word},"
end
puts "#{sentence.slice(0,sentence.length-1)}"
end
end
|
#dump_dictionary_stats ⇒ Object
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
|
# File 'lib/markov/generator.rb', line 119
def dump_dictionary_stats
puts "Keys: #{@dictionary.keys.size}"
dist = {}
n = 0
@dictionary.keys.each do |words|
following = @dictionary[words]
size = following.size
if dist[size]
dist[size] = dist[size] + following.size
else
dist[size] = following.size
end
n = n + following.size
end
dist.keys.sort.each do |s|
puts "BUCKET: #{s}\t=#{dist[s]} (#{((dist[s].to_f/n.to_f)*100).to_i}%)"
end
end
|
#dump_start_words ⇒ Object
101
102
103
104
105
|
# File 'lib/markov/generator.rb', line 101
def dump_start_words
@start_words.keys.each do |words|
puts "#{words[0]},#{words[1]}"
end
end
|
#generate_sentence(min_length = 20) ⇒ Object
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
|
# File 'lib/markov/generator.rb', line 55
def generate_sentence(min_length=20)
if @dictionary.empty?
raise EmptyDictionaryError.new("The dictionary is empty! Parse a source file/string first!")
end
tokens = []
complete_sentence = false
select_start_words.each {|w| tokens << w}
prev_token = tokens.last
begin
token = select_next_token tokens.last(@depth-1)
if token.kind == :stop
token = select_next_word tokens.last(@depth-1) if prev_token.kind == :special
tokens << token
elsif token.kind == :special
token = select_next_word tokens.last(@depth-1) if prev_token.kind == :special
tokens << token
elsif token.kind == :noop
token = Token.new(".", :stop)
tokens[tokens.length-1] = token
else
tokens << token
end
prev_token = token
if token.kind == :stop
if tokens.size < min_length
select_start_words.each {|w| tokens << w}
prev_token = tokens.last
else
complete_sentence = true
end
end
complete_sentence = true if tokens.size > min_length*2
end until complete_sentence
tokens_to_sentence tokens
end
|
#parse_source_file(source) ⇒ Object
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
|
# File 'lib/markov/generator.rb', line 39
def parse_source_file(source)
if File.exists?(source)
sentences = File.open(source, "r").read.force_encoding(Encoding::UTF_8).split(@split_sentence)
else
raise FileNotFoundError.new("#{source} does not exist!")
end
sentences.each do |sentence|
add_unparsed_sentence sentence
end
parse_text
end
|
#parse_string(sentence) ⇒ Object
34
35
36
37
|
# File 'lib/markov/generator.rb', line 34
def parse_string(sentence)
add_unparsed_sentence sentence
parse_text
end
|