34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
|
# File 'lib/lang/lexer.rb', line 34
def tokenize(code)
tokens = []
if code.length == 0
return tokens
end
line = 1
i = 0
while i < code.length
chunk = code[i..-1]
if (operator = chunk[TOKEN_RX[:operator]])
tokens << [operator, operator]
i += operator.length
elsif (constant = chunk[TOKEN_RX[:constants]])
tokens << [:CONSTANT, constant]
i += constant.length
elsif (global = chunk[TOKEN_RX[:globals]])
tokens << [:GLOBAL, $1]
i += global.length
elsif (class_var = chunk[TOKEN_RX[:class_var]])
tokens << [:CLASS_IDENTIFIER, $1]
i += class_var.length
elsif (instance_var = chunk[TOKEN_RX[:instance_var]])
tokens << [:INSTANCE_IDENTIFIER, $1]
i += instance_var.length
elsif (symbol = chunk[TOKEN_RX[:symbol]])
tokens << [:SYMBOL, $1.to_sym]
i += symbol.length
elsif (regex = chunk[TOKEN_RX[:regex]])
pattern, flags = $1, $2
tokens << [:REGEX, pattern.gsub('\"', '"')]
if flags && flags.length > 0
tokens << [:REGEX_FLAGS, flags]
end
i += regex.length
elsif (identifier = chunk[TOKEN_RX[:identifiers]])
if KEYWORDS.include?(identifier)
tokens << [identifier.upcase.gsub(/\?\!/, '').to_sym, identifier]
else
tokens << [:IDENTIFIER, identifier]
end
i += identifier.length
elsif (float = chunk[TOKEN_RX[:float]])
tokens << [:FLOAT, float.to_f]
i += float.length
elsif (integer = chunk[TOKEN_RX[:integer]])
tokens << [:NUMBER, integer.to_i]
i += integer.length
elsif (string = chunk[TOKEN_RX[:string]])
tokens << [:STRING, $1.gsub('\"', '"')]
i += string.length
elsif ( = chunk[TOKEN_RX[:comment]])
i += .length
elsif (terminator = chunk[TOKEN_RX[:terminator]])
tokens << [:TERMINATOR, terminator]
line += 1 if terminator == "\n"
i += 1
elsif (space = chunk[TOKEN_RX[:whitespace]])
i += space.length
else
raise LexicalError.new(code[i], line)
end
end
if tokens.length.positive? && tokens.last != [:TERMINATOR, "\n"]
tokens << [:TERMINATOR, "\n"]
end
tokens << [:EOF, :eof] if tokens.length.positive?
tokens
end
|