Class: Markov::Parser

Inherits:
Object
  • Object
show all
Defined in:
lib/markov/parser.rb

Defined Under Namespace

Classes: EmptyDictionaryError, FileNotFoundError

Instance Method Summary collapse

Constructor Details

#initializeParser

Returns a new instance of Parser.



4
5
6
7
8
9
10
11
# File 'lib/markov/parser.rb', line 4

def initialize
  @split_sentence = /(?<=[.?!])\s+/
  @split_words = /([,.?!])|[\s]/
  @replace_chars = /[„':;_"()]/
  
  @unparsed_sentences = []
  @tokens = []
end

Instance Method Details

#load_text(source) ⇒ Object



19
20
21
22
23
24
25
26
27
28
29
30
31
# File 'lib/markov/parser.rb', line 19

def load_text(source)
  
  if File.exists?(source)
    sentences = File.open(source, "r").read.force_encoding(Encoding::UTF_8).split(@split_sentence)
  else
    raise FileNotFoundError.new("#{source} does not exist!")
  end
  
  sentences.each do |sentence|
    add_unparsed_sentence sentence
  end
  
end

#next_tokenObject



33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# File 'lib/markov/parser.rb', line 33

def next_token
  
  if @tokens.empty?
    sentence = @unparsed_sentences.slice!(0)
    if sentence
      sentence.each do |word|
        
        if word.include?(",")
          @tokens << Markov::Token.new(",", :special)
        elsif word.include?("?")
          @tokens << Markov::Token.new("?", :stop)
        elsif word.include?("!")
          @tokens << Markov::Token.new("!", :stop)
        elsif word.include?(".")
          @tokens << Markov::Token.new(".", :stop)
        elsif word == ""
          # skip blanks
        else
          @tokens << Markov::Token.new(word, :word)
        end            
      end
    else
      @tokens = nil
    end
  end
  
  return @tokens.slice!(0) if @tokens
  
  @tokens = []
  nil
end