Class: Dentaku::Tokenizer

Inherits:
Object
  • Object
show all
Defined in:
lib/dentaku/tokenizer.rb

Constant Summary collapse

SCANNERS =
[
  TokenScanner.new(:whitespace, '\s+'),
  TokenScanner.new(:numeric,    '(\d+(\.\d+)?|\.\d+)', lambda { |raw| raw =~ /\./ ? raw.to_f : raw.to_i }),
  TokenScanner.new(:string,     '"[^"]*"',    lambda { |raw| raw.gsub(/^"|"$/, '') }),
  TokenScanner.new(:string,     "'[^']*'",    lambda { |raw| raw.gsub(/^'|'$/, '') }),
  TokenScanner.new(:operator,   '\+|-|\*|\/', lambda do |raw|
    case raw
    when '+' then :add
    when '-' then :subtract
    when '*' then :multiply
    when '/' then :divide
    end
  end),
  TokenScanner.new(:grouping,   '\(|\)|,', lambda do |raw|
    case raw
    when '(' then :open
    when ')' then :close
    when ',' then :comma
    end
  end),
  TokenScanner.new(:comparator, '<=|>=|!=|<>|<|>|=', lambda do |raw|
    case raw
    when '<=' then :le
    when '>=' then :ge
    when '!=' then :ne
    when '<>' then :ne
    when '<'  then :lt
    when '>'  then :gt
    when '='  then :eq
    end
  end),
  TokenScanner.new(:combinator, '(and|or)\b',   lambda { |raw| raw.strip.downcase.to_sym }),
  TokenScanner.new(:function,   '(if|round)\b', lambda { |raw| raw.strip.to_sym }),
  TokenScanner.new(:identifier, '[a-z_]+',      lambda { |raw| raw.downcase.to_sym })
]
LPAREN =
TokenMatcher.new(:grouping, :open)
RPAREN =
TokenMatcher.new(:grouping, :close)

Instance Method Summary collapse

Instance Method Details

#tokenize(string) ⇒ Object



46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# File 'lib/dentaku/tokenizer.rb', line 46

def tokenize(string)
  nesting = 0
  tokens  = []
  input   = string.dup

  until input.empty?
    raise "parse error at: '#{ input }'" unless SCANNERS.any? do |scanner|
      if token = scanner.scan(input)
        raise "unexpected zero-width match (:#{ token.category }) at '#{ input }'" if token.length == 0

        nesting += 1 if LPAREN == token
        nesting -= 1 if RPAREN == token
        raise "too many closing parentheses" if nesting < 0

        tokens << token unless token.is?(:whitespace)
        input.slice!(0, token.length)

        true
      else
        false
      end
    end
  end

  raise "too many opening parentheses" if nesting > 0
  tokens
end