33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
|
# File 'lib/lang/lexer.rb', line 33
def tokenize(code)
tokens = []
if code.length == 0
return tokens
end
line = 1
i = 0
while i < code.length
chunk = code[i..-1]
if operator = chunk[TOKEN_RX[:operator]]
tokens << [operator, operator]
i += operator.length
elsif constant = chunk[TOKEN_RX[:constants]]
tokens << [:CONSTANT, constant]
i += constant.length
elsif global = chunk[TOKEN_RX[:globals]]
tokens << [:GLOBAL, $1]
i += global.length
elsif class_var = chunk[TOKEN_RX[:class_var]]
tokens << [:CLASS_IDENTIFIER, $1]
i += class_var.length
elsif instance_var = chunk[TOKEN_RX[:instance_var]]
tokens << [:INSTANCE_IDENTIFIER, $1]
i += instance_var.length
elsif symbol = chunk[TOKEN_RX[:symbol]]
tokens << [:SYMBOL, $1.to_sym]
i += symbol.length
elsif regex = chunk[TOKEN_RX[:regex]]
pattern, flags = $1, $2
tokens << [:REGEX, pattern.gsub('\"', '"')]
if flags && flags.length > 0
tokens << [:REGEX_FLAGS, flags]
end
i += regex.length
elsif identifier = chunk[TOKEN_RX[:identifiers]]
if KEYWORDS.include? identifier
tokens << [identifier.upcase.gsub(/\?\!/, "").to_sym, identifier]
else
tokens << [:IDENTIFIER, identifier]
end
i += identifier.length
elsif float = chunk[TOKEN_RX[:float]]
tokens << [:FLOAT, float.to_f]
i += float.length
elsif integer = chunk[TOKEN_RX[:integer]]
tokens << [:NUMBER, integer.to_i]
i += integer.length
elsif string = chunk[TOKEN_RX[:string]]
tokens << [:STRING, $1.gsub('\"', '"')]
i += string.length
elsif = chunk[TOKEN_RX[:comment]]
i += .length
elsif terminator = chunk[TOKEN_RX[:terminator]]
tokens << [:TERMINATOR, terminator]
if terminator == "\n"
line += 1
end
i += 1
elsif space = chunk[TOKEN_RX[:whitespace]]
i += space.length
else
raise LexicalError.new(code[i], line)
end
end
if tokens.length > 0 && tokens.last != [:TERMINATOR, "\n"]
tokens << [:TERMINATOR, "\n"]
end
tokens << [:EOF, :eof] if tokens.length > 0
tokens
end
|