Class: TinyGQL::Lexer
Defined Under Namespace
Modules: Literals
Constant Summary
collapse
- IDENTIFIER =
/[_A-Za-z][_0-9A-Za-z]*\b/
- IGNORE =
%r{
(?:
[, \c\r\n\t]+ |
\#.*$
)*
}x
- INT =
/[-]?(?:[0]|[1-9][0-9]*)/
- FLOAT_DECIMAL =
/[.][0-9]+/
- FLOAT_EXP =
/[eE][+-]?[0-9]+/
- NUMERIC =
/#{INT}(#{FLOAT_DECIMAL}#{FLOAT_EXP}|#{FLOAT_DECIMAL}|#{FLOAT_EXP})?/
- KEYWORDS =
{
"on" => :ON,
"fragment" => :FRAGMENT,
"true" => :TRUE,
"false" => :FALSE,
"null" => :NULL,
"query" => :QUERY,
"mutation" => :MUTATION,
"subscription" => :SUBSCRIPTION,
"schema" => :SCHEMA,
"scalar" => :SCALAR,
"type" => :TYPE,
"extend" => :EXTEND,
"implements" => :IMPLEMENTS,
"interface" => :INTERFACE,
"union" => :UNION,
"enum" => :ENUM,
"input" => :INPUT,
"directive" => :DIRECTIVE,
"repeatable" => :REPEATABLE
}.freeze
- QUOTE =
'"'
- UNICODE_DIGIT =
/[0-9A-Za-z]/
- FOUR_DIGIT_UNICODE =
/#{UNICODE_DIGIT}{4}/
- N_DIGIT_UNICODE =
%r{#{LCURLY}#{UNICODE_DIGIT}{4,}#{RCURLY}}x
- UNICODE_ESCAPE =
%r{\\u(?:#{FOUR_DIGIT_UNICODE}|#{N_DIGIT_UNICODE})}
- STRING_ESCAPE =
%r{[\\][\\/bfnrt]}
- BLOCK_QUOTE =
'"""'
- ESCAPED_QUOTE =
/\\"/
- STRING_CHAR =
/#{ESCAPED_QUOTE}|[^"\\]|#{UNICODE_ESCAPE}|#{STRING_ESCAPE}/
- LIT_NAME_LUT =
Literals.constants.each_with_object({}) { |n, o|
o[Literals.const_get(n)] = n
}
- LIT =
Regexp.union(Literals.constants.map { |n| Literals.const_get(n) })
- QUOTED_STRING =
%r{#{QUOTE} ((?:#{STRING_CHAR})*) #{QUOTE}}x
- BLOCK_STRING =
%r{
#{BLOCK_QUOTE}
((?: [^"\\] | # Any characters that aren't a quote or slash
(?<!") ["]{1,2} (?!") | # Any quotes that don't have quotes next to them
\\"{0,3}(?!") | # A slash followed by <= 3 quotes that aren't followed by a quote
\\ | # A slash
"{1,2}(?!") # 1 or 2 " followed by something that isn't a quote
)*
(?:"")?)
#{BLOCK_QUOTE}
}xm
- ESCAPES =
/\\["\\\/bfnrt]/
- ESCAPES_REPLACE =
{
'\\"' => '"',
"\\\\" => "\\",
"\\/" => '/',
"\\b" => "\b",
"\\f" => "\f",
"\\n" => "\n",
"\\r" => "\r",
"\\t" => "\t",
}
- UTF_8 =
/\\u(?:([\dAa-f]{4})|\{([\da-f]{4,})\})(?:\\u([\dAa-f]{4}))?/i
- VALID_STRING =
/\A(?:[^\\]|#{ESCAPES}|#{UTF_8})*\z/o
Constants included
from Literals
Literals::AMP, Literals::BANG, Literals::COLON, Literals::DIR_SIGN, Literals::ELLIPSIS, Literals::EQUALS, Literals::LBRACKET, Literals::LCURLY, Literals::LPAREN, Literals::PIPE, Literals::RBRACKET, Literals::RCURLY, Literals::RPAREN, Literals::VAR_SIGN
Instance Attribute Summary collapse
Instance Method Summary
collapse
Constructor Details
#initialize(string) ⇒ Lexer
Returns a new instance of Lexer.
90
91
92
93
94
95
96
|
# File 'lib/tinygql/lexer.rb', line 90
def initialize string
raise unless string.valid_encoding?
@scan = StringScanner.new string
@token_name = nil
@token_value = nil
end
|
Instance Attribute Details
#token_name ⇒ Object
Returns the value of attribute token_name.
121
122
123
|
# File 'lib/tinygql/lexer.rb', line 121
def token_name
@token_name
end
|
#token_value ⇒ Object
Returns the value of attribute token_value.
121
122
123
|
# File 'lib/tinygql/lexer.rb', line 121
def token_value
@token_value
end
|
Instance Method Details
#advance ⇒ Object
106
107
108
109
110
111
112
113
114
115
116
117
118
119
|
# File 'lib/tinygql/lexer.rb', line 106
def advance
@scan.skip(IGNORE)
case
when str = @scan.scan(LIT) then return emit(LIT_NAME_LUT[str], str)
when str = @scan.scan(IDENTIFIER) then return emit(KEYWORDS.fetch(str, :IDENTIFIER), str)
when @scan.skip(BLOCK_STRING) then return emit_block(@scan[1])
when @scan.skip(QUOTED_STRING) then return emit_string(@scan[1])
when str = @scan.scan(NUMERIC) then return emit(@scan[1] ? :FLOAT : :INT, str)
when @scan.eos? then emit(nil, nil) and return false
else
emit(:UNKNOWN_CHAR, @scan.getch)
end
end
|
#done? ⇒ Boolean
102
103
104
|
# File 'lib/tinygql/lexer.rb', line 102
def done?
@scan.eos?
end
|
#emit(token_name, token_value) ⇒ Object
123
124
125
126
127
|
# File 'lib/tinygql/lexer.rb', line 123
def emit token_name, token_value
@token_name = token_name
@token_value = token_value
true
end
|
#emit_block(value) ⇒ Object
173
174
175
176
|
# File 'lib/tinygql/lexer.rb', line 173
def emit_block(value)
value = trim_whitespace(value)
emit_string(value)
end
|
#emit_string(value) ⇒ Object
178
179
180
181
182
183
184
185
186
187
188
189
190
|
# File 'lib/tinygql/lexer.rb', line 178
def emit_string(value)
if !value.valid_encoding? || !value.match?(VALID_STRING)
emit(:BAD_UNICODE_ESCAPE, value)
else
replace_escaped_characters_in_place(value)
if !value.valid_encoding?
emit(:BAD_UNICODE_ESCAPE, value)
else
emit(:STRING, value)
end
end
end
|
#line ⇒ Object
98
99
100
|
# File 'lib/tinygql/lexer.rb', line 98
def line
@scan.string[0, @scan.pos].count("\n") + 1
end
|
#next_token ⇒ Object
129
130
131
|
# File 'lib/tinygql/lexer.rb', line 129
def next_token
advance && [@token_name, @token_value]
end
|
#replace_escaped_characters_in_place(raw_string) ⇒ Object
Replace any escaped unicode or whitespace with the actual characters To avoid allocating more strings, this modifies the string passed into it
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
|
# File 'lib/tinygql/lexer.rb', line 135
def replace_escaped_characters_in_place(raw_string)
raw_string.gsub!(ESCAPES, ESCAPES_REPLACE)
raw_string.gsub!(UTF_8) do |_matched_str|
codepoint_1 = ($1 || $2).to_i(16)
codepoint_2 = $3
if codepoint_2
codepoint_2 = codepoint_2.to_i(16)
if (codepoint_1 >= 0xD800 && codepoint_1 <= 0xDBFF) && (codepoint_2 >= 0xDC00 && codepoint_2 <= 0xDFFF) combined = ((codepoint_1 - 0xD800) * 0x400) + (codepoint_2 - 0xDC00) + 0x10000
[combined].pack('U'.freeze)
else
[codepoint_1].pack('U'.freeze) + [codepoint_2].pack('U'.freeze)
end
else
[codepoint_1].pack('U'.freeze)
end
end
nil
end
|
#trim_whitespace(str) ⇒ Object
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
|
# File 'lib/tinygql/lexer.rb', line 192
def trim_whitespace(str)
if str == ""
return "".dup
elsif !(has_newline = str.include?("\n")) && !(str.start_with?(" "))
return str
end
lines = has_newline ? str.split("\n") : [str]
common_indent = nil
lines.each_with_index do |line, idx|
if idx == 0
next
end
line_length = line.size
line_indent = if line.match?(/\A [^ ]/)
2
elsif line.match?(/\A [^ ]/)
4
elsif line.match?(/\A[^ ]/)
0
else
line[/\A */].size
end
if line_indent < line_length && (common_indent.nil? || line_indent < common_indent)
common_indent = line_indent
end
end
if common_indent && common_indent > 0
lines.each_with_index do |line, idx|
if idx == 0
next
else
line.slice!(0, common_indent)
end
end
end
while lines.size > 0 && lines[0].empty?
lines.shift
end
while lines.size > 0 && lines[-1].empty?
lines.pop
end
lines.size > 1 ? lines.join("\n") : (lines.first || "".dup)
end
|