Class: RubyMarkovify::Text

Inherits:
Object
  • Object
show all
Includes:
Splitters
Defined in:
lib/ruby_markovify/text.rb

Direct Known Subclasses

ArrayText, NewlineText

Constant Summary collapse

WORD_SPLIT_PATTERN =
/\s+/
REJECT_PATTERN =
/(^')|('$)|\s'|'\s|["(\(\)\[\])]/
DEFAULT_MAX_OVERLAP_RATIO =
0.7
DEFAULT_MAX_OVERLAP_TOTAL =
15
DEFAULT_TRIES =
10

Constants included from Splitters

Splitters::ABBR_CAPPED, Splitters::ABBR_LOWERCASE, Splitters::ASCII_LOWERCASE, Splitters::ASCII_UPPERCASE, Splitters::END_PATTERN, Splitters::EXCEPTIONS, Splitters::INITIALS, Splitters::MONTHS, Splitters::PUNCTUATION, Splitters::STATES, Splitters::STREETS, Splitters::TITLES, Splitters::UNITED_STATES

Instance Method Summary collapse

Methods included from Splitters

#is_abbreviation, #is_sentence_ender, #split_into_sentences

Constructor Details

#initialize(input_text, state_size = nil, chain = nil) ⇒ Text

Returns a new instance of Text.



7
8
9
10
11
12
# File 'lib/ruby_markovify/text.rb', line 7

def initialize(input_text, state_size = nil, chain = nil)
  runs = generate_corpus(input_text)
  @rejoined_text = sentence_join(runs.map { |e| word_join(e) })
  state_size ||= 2
  @chain = chain || Chain.new(runs, state_size)
end

Instance Method Details

#generate_corpus(text) ⇒ Object



38
39
40
41
42
# File 'lib/ruby_markovify/text.rb', line 38

def generate_corpus(text)
  sentences = sentence_split text
  sentences.reject! { |e| test_sentence_input(e) }
  sentences.map { |e| word_split(e) }
end

#make_sentence(init_state = nil, options = {}) ⇒ Object



63
64
65
66
67
68
69
70
71
72
73
# File 'lib/ruby_markovify/text.rb', line 63

def make_sentence(init_state = nil, options = {})
  tries = options[:tries] || DEFAULT_TRIES
  mor = options[:max_overlap_ratio] || DEFAULT_MAX_OVERLAP_RATIO
  mot = options[:max_overlap_total] || DEFAULT_MAX_OVERLAP_TOTAL

  tries.times do
    words = @chain.walk(init_state)
    return word_join(words) if test_sentence_output(words, mor, mot)
  end
  nil
end

#make_sentence_with_start(beginning, options = {}) ⇒ Object



82
83
84
# File 'lib/ruby_markovify/text.rb', line 82

def make_sentence_with_start(beginning, options = {})
  make_sentence(word_split(beginning), options)
end

#make_short_sentence(char_limit, options = {}) ⇒ Object



75
76
77
78
79
80
# File 'lib/ruby_markovify/text.rb', line 75

def make_short_sentence(char_limit, options = {})
  loop do
    sentence = make_sentence(nil, options)
    return sentence if sentence && sentence.length < char_limit
  end
end

#sentence_join(sentences) ⇒ Object



19
20
21
# File 'lib/ruby_markovify/text.rb', line 19

def sentence_join(sentences)
  sentences.join ' '
end

#sentence_split(text) ⇒ Object



15
16
17
# File 'lib/ruby_markovify/text.rb', line 15

def sentence_split(text)
  split_into_sentences(text)
end

#test_sentence_input(sentence) ⇒ Object



34
35
36
# File 'lib/ruby_markovify/text.rb', line 34

def test_sentence_input(sentence)
  !!(sentence.to_ascii =~ REJECT_PATTERN)
end

#test_sentence_output(words, max_overlap_ratio, max_overlap_total) ⇒ Object



44
45
46
47
48
49
50
51
52
53
54
55
56
57
# File 'lib/ruby_markovify/text.rb', line 44

def test_sentence_output(words, max_overlap_ratio, max_overlap_total)
  overlap_ratio = (max_overlap_ratio * words.length).round
  overlap_max = [max_overlap_total, overlap_ratio].min
  overlap_over = overlap_max + 1
  gram_count = [words.length - overlap_max, 1].max

  grams = [*0..gram_count].map { |i| words[i..i+overlap_over] }
  grams.each do |g|
    gram_joined = word_join(g)
    return false if @rejoined_text.include? gram_joined
  end

  true
end

#word_join(words) ⇒ Object



28
29
30
# File 'lib/ruby_markovify/text.rb', line 28

def word_join(words)
  words.join ' '
end

#word_split(sentence) ⇒ Object



24
25
26
# File 'lib/ruby_markovify/text.rb', line 24

def word_split(sentence)
  sentence.split(WORD_SPLIT_PATTERN)
end