Class: Dentaku::Tokenizer

Inherits:
Object
  • Object
show all
Defined in:
lib/dentaku/tokenizer.rb

Constant Summary collapse

LPAREN =
TokenMatcher.new(:grouping, :open)
RPAREN =
TokenMatcher.new(:grouping, :close)

Instance Method Summary collapse

Instance Method Details

#last_tokenObject



27
28
29
# File 'lib/dentaku/tokenizer.rb', line 27

def last_token
  @tokens.last
end

#scan(string, scanner) ⇒ Object



31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# File 'lib/dentaku/tokenizer.rb', line 31

def scan(string, scanner)
  if tokens = scanner.scan(string, last_token)
    tokens.each do |token|
      raise "unexpected zero-width match (:#{ token.category }) at '#{ string }'" if token.length == 0

      @nesting += 1 if LPAREN == token
      @nesting -= 1 if RPAREN == token
      raise "too many closing parentheses" if @nesting < 0

      @tokens << token unless token.is?(:whitespace)
    end

    match_length = tokens.map(&:length).reduce(:+)
    [true, string[match_length..-1]]
  else
    [false, string]
  end
end

#strip_comments(input) ⇒ Object



50
51
52
# File 'lib/dentaku/tokenizer.rb', line 50

def strip_comments(input)
  input.gsub(/\/\*[^*]*\*+(?:[^*\/][^*]*\*+)*\//, '')
end

#tokenize(string) ⇒ Object



10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
# File 'lib/dentaku/tokenizer.rb', line 10

def tokenize(string)
  @nesting = 0
  @tokens  = []
  input    = strip_comments(string.to_s.dup)

  until input.empty?
    raise "parse error at: '#{ input }'" unless TokenScanner.scanners.any? do |scanner|
      scanned, input = scan(input, scanner)
      scanned
    end
  end

  raise "too many opening parentheses" if @nesting > 0

  @tokens
end