Class: TinyGQL::Lexer
Defined Under Namespace
Modules: Literals
Constant Summary
collapse
- IDENTIFIER =
/[_A-Za-z][_0-9A-Za-z]*/
- IGNORE =
%r{
(?:
[, \cc\r\n\t]+ |
\#.*$
)
}x
- INT =
/[-]?(?:[0]|[1-9][0-9]*)/
- FLOAT_DECIMAL =
/[.][0-9]+/
- FLOAT_EXP =
/[eE][+-]?[0-9]+/
- FLOAT =
/#{INT}(#{FLOAT_DECIMAL}#{FLOAT_EXP}|#{FLOAT_DECIMAL}|#{FLOAT_EXP})/
- QUOTE =
'"'
- UNICODE_DIGIT =
/[0-9A-Za-z]/
- FOUR_DIGIT_UNICODE =
/#{UNICODE_DIGIT}{4}/
- N_DIGIT_UNICODE =
%r{#{LCURLY}#{UNICODE_DIGIT}{4,}#{RCURLY}}x
- UNICODE_ESCAPE =
%r{\\u(?:#{FOUR_DIGIT_UNICODE}|#{N_DIGIT_UNICODE})}
- STRING_ESCAPE =
%r{[\\][\\/bfnrt]}
- BLOCK_QUOTE =
'"""'
- ESCAPED_QUOTE =
/\\"/
- STRING_CHAR =
/#{ESCAPED_QUOTE}|[^"\\]|#{UNICODE_ESCAPE}|#{STRING_ESCAPE}/
- LIT_NAME_LUT =
Literals.constants.each_with_object({}) { |n, o|
key = Literals.const_get(n)
key = key.is_a?(Regexp) ? key.source.gsub(/(\\b|\\)/, '') : key
o[key] = n
}
- LIT =
Regexp.union(Literals.constants.map { |n| Literals.const_get(n) })
- QUOTED_STRING =
%r{#{QUOTE} ((?:#{STRING_CHAR})*) #{QUOTE}}x
- BLOCK_STRING =
%r{
#{BLOCK_QUOTE}
((?: [^"\\] | # Any characters that aren't a quote or slash
(?<!") ["]{1,2} (?!") | # Any quotes that don't have quotes next to them
\\"{0,3}(?!") | # A slash followed by <= 3 quotes that aren't followed by a quote
\\ | # A slash
"{1,2}(?!") # 1 or 2 " followed by something that isn't a quote
)*
(?:"")?)
#{BLOCK_QUOTE}
}xm
- UNKNOWN_CHAR =
# catch-all for anything else. must be at the bottom for precedence.
/./
- ESCAPES =
/\\["\\\/bfnrt]/
- ESCAPES_REPLACE =
{
'\\"' => '"',
"\\\\" => "\\",
"\\/" => '/',
"\\b" => "\b",
"\\f" => "\f",
"\\n" => "\n",
"\\r" => "\r",
"\\t" => "\t",
}
- UTF_8 =
/\\u(?:([\dAa-f]{4})|\{([\da-f]{4,})\})(?:\\u([\dAa-f]{4}))?/i
- VALID_STRING =
/\A(?:[^\\]|#{ESCAPES}|#{UTF_8})*\z/o
Constants included
from Literals
Literals::AMP, Literals::BANG, Literals::COLON, Literals::DIRECTIVE, Literals::DIR_SIGN, Literals::ELLIPSIS, Literals::ENUM, Literals::EQUALS, Literals::EXTEND, Literals::FALSE, Literals::FRAGMENT, Literals::IMPLEMENTS, Literals::INPUT, Literals::INTERFACE, Literals::LBRACKET, Literals::LCURLY, Literals::LPAREN, Literals::MUTATION, Literals::NULL, Literals::ON, Literals::PIPE, Literals::QUERY, Literals::RBRACKET, Literals::RCURLY, Literals::REPEATABLE, Literals::RPAREN, Literals::SCALAR, Literals::SCHEMA, Literals::SUBSCRIPTION, Literals::TRUE, Literals::TYPE, Literals::UNION, Literals::VAR_SIGN
Instance Attribute Summary collapse
Instance Method Summary
collapse
Constructor Details
#initialize(string) ⇒ Lexer
Returns a new instance of Lexer.
92
93
94
95
96
97
98
|
# File 'lib/tinygql/lexer.rb', line 92
def initialize string
raise unless string.valid_encoding?
@scan = StringScanner.new string
@token_name = nil
@token_value = nil
end
|
Instance Attribute Details
#token_name ⇒ Object
Returns the value of attribute token_name.
131
132
133
|
# File 'lib/tinygql/lexer.rb', line 131
def token_name
@token_name
end
|
#token_value ⇒ Object
Returns the value of attribute token_value.
131
132
133
|
# File 'lib/tinygql/lexer.rb', line 131
def token_value
@token_value
end
|
Instance Method Details
#advance ⇒ Object
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
|
# File 'lib/tinygql/lexer.rb', line 108
def advance
while true
if @scan.eos?
emit nil, nil
return false
end
case
when @scan.skip(IGNORE) then redo
when str = @scan.scan(FLOAT) then return emit(:FLOAT, str)
when str = @scan.scan(INT) then return emit(:INT, str)
when str = @scan.scan(LIT) then return emit(LIT_NAME_LUT[str], str)
when str = @scan.scan(IDENTIFIER) then return emit(:IDENTIFIER, str)
when @scan.skip(BLOCK_STRING) then return emit_block(@scan[1])
when @scan.skip(QUOTED_STRING) then return emit_string(@scan[1])
when str = @scan.scan(UNKNOWN_CHAR) then return emit(:UNKNOWN_CHAR, str)
else
raise "Unknown string?"
end
end
end
|
#done? ⇒ Boolean
104
105
106
|
# File 'lib/tinygql/lexer.rb', line 104
def done?
@scan.eos?
end
|
#emit(token_name, token_value) ⇒ Object
133
134
135
136
137
|
# File 'lib/tinygql/lexer.rb', line 133
def emit token_name, token_value
@token_name = token_name
@token_value = token_value
true
end
|
#emit_block(value) ⇒ Object
183
184
185
186
|
# File 'lib/tinygql/lexer.rb', line 183
def emit_block(value)
value = trim_whitespace(value)
emit_string(value)
end
|
#emit_string(value) ⇒ Object
188
189
190
191
192
193
194
195
196
197
198
199
200
|
# File 'lib/tinygql/lexer.rb', line 188
def emit_string(value)
if !value.valid_encoding? || !value.match?(VALID_STRING)
emit(:BAD_UNICODE_ESCAPE, value)
else
replace_escaped_characters_in_place(value)
if !value.valid_encoding?
emit(:BAD_UNICODE_ESCAPE, value)
else
emit(:STRING, value)
end
end
end
|
#line ⇒ Object
100
101
102
|
# File 'lib/tinygql/lexer.rb', line 100
def line
@scan.string[0, @scan.pos].count("\n") + 1
end
|
#next_token ⇒ Object
139
140
141
|
# File 'lib/tinygql/lexer.rb', line 139
def next_token
advance && [@token_name, @token_value]
end
|
#replace_escaped_characters_in_place(raw_string) ⇒ Object
Replace any escaped unicode or whitespace with the actual characters To avoid allocating more strings, this modifies the string passed into it
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
|
# File 'lib/tinygql/lexer.rb', line 145
def replace_escaped_characters_in_place(raw_string)
raw_string.gsub!(ESCAPES, ESCAPES_REPLACE)
raw_string.gsub!(UTF_8) do |_matched_str|
codepoint_1 = ($1 || $2).to_i(16)
codepoint_2 = $3
if codepoint_2
codepoint_2 = codepoint_2.to_i(16)
if (codepoint_1 >= 0xD800 && codepoint_1 <= 0xDBFF) &&
(codepoint_2 >= 0xDC00 && codepoint_2 <= 0xDFFF)
combined = ((codepoint_1 - 0xD800) * 0x400) + (codepoint_2 - 0xDC00) + 0x10000
[combined].pack('U'.freeze)
else
[codepoint_1].pack('U'.freeze) + [codepoint_2].pack('U'.freeze)
end
else
[codepoint_1].pack('U'.freeze)
end
end
nil
end
|
#trim_whitespace(str) ⇒ Object
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
|
# File 'lib/tinygql/lexer.rb', line 202
def trim_whitespace(str)
if str == ""
return "".dup
elsif !(has_newline = str.include?("\n")) && !(str.start_with?(" "))
return str
end
lines = has_newline ? str.split("\n") : [str]
common_indent = nil
lines.each_with_index do |line, idx|
if idx == 0
next
end
line_length = line.size
line_indent = if line.match?(/\A [^ ]/)
2
elsif line.match?(/\A [^ ]/)
4
elsif line.match?(/\A[^ ]/)
0
else
line[/\A */].size
end
if line_indent < line_length && (common_indent.nil? || line_indent < common_indent)
common_indent = line_indent
end
end
if common_indent && common_indent > 0
lines.each_with_index do |line, idx|
if idx == 0
next
else
line.slice!(0, common_indent)
end
end
end
while lines.size > 0 && lines[0].empty?
lines.shift
end
while lines.size > 0 && lines[-1].empty?
lines.pop
end
lines.size > 1 ? lines.join("\n") : (lines.first || "".dup)
end
|