Class: Lexer

Inherits:
Object
  • Object
show all
Defined in:
lib/turmali/lexer.rb

Constant Summary collapse

KEYWORDS =
["def", "class", "if", "true", "false", "nil"]

Instance Method Summary collapse

Instance Method Details

#tokenize(code) ⇒ Object



5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# File 'lib/turmali/lexer.rb', line 5

def tokenize(code)
  code.chomp! 
  tokens = [] 

  current_indent = 0 
  indent_stack = []
  
  i = 0 
  while i < code.size
    chunk = code[i..-1]
    
    if identifier = chunk[/\A([a-z]\w*)/, 1]
      if KEYWORDS.include?(identifier) 
        tokens << [identifier.upcase.to_sym, identifier]
      else
        tokens << [:IDENTIFIER, identifier]
      end
      i += identifier.size 

    elsif constant = chunk[/\A([A-Z]\w*)/, 1]
      tokens << [:CONSTANT, constant]
      i += constant.size

    elsif number = chunk[/\A(\d+(\.\d+)?)/, 1]
      tokens << [:NUMBER, number.to_f]
      i += number.size
      
    elsif string = chunk[/\A"([^"]*)"/, 1]
      tokens << [:STRING, string]
      i += string.size + 2 
    
    elsif indent = chunk[/\A\:\n( +)/m, 1] 
      if indent.size <= current_indent 
        raise "Bad indent level, got #{indent.size} indents, " +
              "expected > #{current_indent}"
      end
      current_indent = indent.size
      indent_stack.push(current_indent)
      tokens << [:INDENT, indent.size]
      i += indent.size + 2

    elsif indent = chunk[/\A\n( *)/m, 1] 
      if indent.size == current_indent 
        tokens << [:NEWLINE, "\n"] 
      elsif indent.size < current_indent 
        while indent.size < current_indent
          indent_stack.pop
          current_indent = indent_stack.last || 0
          tokens << [:DEDENT, indent.size]
        end
        tokens << [:NEWLINE, "\n"]
      else
        raise "Missing ':'" 
      end
      i += indent.size + 1
    elsif operator = chunk[/\A(\|\||&&|==|!=|<=|>=)/, 1]
      tokens << [operator, operator]
      i += operator.size
    elsif chunk.match(/\A /)
      i += 1
    else
      value = chunk[0,1]
      tokens << [value, value]
      i += 1
    end
  end

  while indent = indent_stack.pop
    tokens << [:DEDENT, indent_stack.first || 0]
  end
  
  tokens
end