Class: RubyMarkovify::Text
- Inherits:
-
Object
- Object
- RubyMarkovify::Text
show all
- Includes:
- Splitters
- Defined in:
- lib/ruby_markovify/text.rb
Constant Summary
collapse
- WORD_SPLIT_PATTERN =
/\s+/
- REJECT_PATTERN =
/(^')|('$)|\s'|'\s|["(\(\)\[\])]/
- DEFAULT_MAX_OVERLAP_RATIO =
0.7
- DEFAULT_MAX_OVERLAP_TOTAL =
15
- DEFAULT_TRIES =
10
Constants included
from Splitters
Splitters::ABBR_CAPPED, Splitters::ABBR_LOWERCASE, Splitters::ASCII_LOWERCASE, Splitters::ASCII_UPPERCASE, Splitters::END_PATTERN, Splitters::EXCEPTIONS, Splitters::INITIALS, Splitters::MONTHS, Splitters::PUNCTUATION, Splitters::STATES, Splitters::STREETS, Splitters::TITLES, Splitters::UNITED_STATES
Instance Method Summary
collapse
Methods included from Splitters
#is_abbreviation, #is_sentence_ender, #split_into_sentences
Constructor Details
#initialize(input_text, state_size = nil, chain = nil) ⇒ Text
Returns a new instance of Text.
7
8
9
10
11
12
|
# File 'lib/ruby_markovify/text.rb', line 7
def initialize(input_text, state_size = nil, chain = nil)
runs = generate_corpus(input_text)
@rejoined_text = sentence_join(runs.map { |e| word_join(e) })
state_size ||= 2
@chain = chain || Chain.new(runs, state_size)
end
|
Instance Method Details
#generate_corpus(text) ⇒ Object
38
39
40
41
42
|
# File 'lib/ruby_markovify/text.rb', line 38
def generate_corpus(text)
sentences = sentence_split text
sentences.reject! { |e| test_sentence_input(e) }
sentences.map { |e| word_split(e) }
end
|
#make_sentence(init_state = nil, options = {}) ⇒ Object
63
64
65
66
67
68
69
70
71
72
73
|
# File 'lib/ruby_markovify/text.rb', line 63
def make_sentence(init_state = nil, options = {})
tries = options[:tries] || DEFAULT_TRIES
mor = options[:max_overlap_ratio] || DEFAULT_MAX_OVERLAP_RATIO
mot = options[:max_overlap_total] || DEFAULT_MAX_OVERLAP_TOTAL
tries.times do
words = @chain.walk(init_state)
return word_join(words) if test_sentence_output(words, mor, mot)
end
nil
end
|
#make_sentence_with_start(beginning, options = {}) ⇒ Object
82
83
84
|
# File 'lib/ruby_markovify/text.rb', line 82
def make_sentence_with_start(beginning, options = {})
make_sentence(word_split(beginning), options)
end
|
#make_short_sentence(char_limit, options = {}) ⇒ Object
75
76
77
78
79
80
|
# File 'lib/ruby_markovify/text.rb', line 75
def make_short_sentence(char_limit, options = {})
loop do
sentence = make_sentence(nil, options)
return sentence if sentence && sentence.length < char_limit
end
end
|
#sentence_join(sentences) ⇒ Object
19
20
21
|
# File 'lib/ruby_markovify/text.rb', line 19
def sentence_join(sentences)
sentences.join ' '
end
|
#sentence_split(text) ⇒ Object
15
16
17
|
# File 'lib/ruby_markovify/text.rb', line 15
def sentence_split(text)
split_into_sentences(text)
end
|
34
35
36
|
# File 'lib/ruby_markovify/text.rb', line 34
def test_sentence_input(sentence)
!!(sentence.to_ascii =~ REJECT_PATTERN)
end
|
#test_sentence_output(words, max_overlap_ratio, max_overlap_total) ⇒ Object
44
45
46
47
48
49
50
51
52
53
54
55
56
57
|
# File 'lib/ruby_markovify/text.rb', line 44
def test_sentence_output(words, max_overlap_ratio, max_overlap_total)
overlap_ratio = (max_overlap_ratio * words.length).round
overlap_max = [max_overlap_total, overlap_ratio].min
overlap_over = overlap_max + 1
gram_count = [words.length - overlap_max, 1].max
grams = [*0..gram_count].map { |i| words[i..i+overlap_over] }
grams.each do |g|
gram_joined = word_join(g)
return false if @rejoined_text.include? gram_joined
end
true
end
|
#word_join(words) ⇒ Object
28
29
30
|
# File 'lib/ruby_markovify/text.rb', line 28
def word_join(words)
words.join ' '
end
|
#word_split(sentence) ⇒ Object
24
25
26
|
# File 'lib/ruby_markovify/text.rb', line 24
def word_split(sentence)
sentence.split(WORD_SPLIT_PATTERN)
end
|