Class: Caps::Tokenizer
- Inherits:
-
Object
show all
- Defined in:
- lib/caps/tokenizer.rb,
lib/caps/tokenizer/infra.rb,
lib/caps/tokenizer/helpers.rb,
lib/caps/tokenizer/location.rb
Defined Under Namespace
Modules: Helpers
Classes: Location
Constant Summary
collapse
- LINE_FEED =
"\u000a"
- REPLACEMENT_CHARACTER =
"\ufffd"
- SOLIDUS =
"/"
- REVERSE_SOLIDUS =
"\\"
- ASTERISK =
"*"
- SINGLE_QUOTE =
"'"
- DOUBLE_QUOTE =
'"'
- NUMBER_SIGN =
"#"
- HYPHEN_MINUS =
"\u002d"
- LEFT_PARENS =
"("
- RIGHT_PARENS =
")"
- PLUS_SIGN =
"+"
- COMMA =
","
- FULL_STOP =
"."
- COLON =
":"
- SEMI =
";"
- LESS_THAN =
"<"
- COMMERCIAL_AT =
"@"
- LEFT_SQUARE =
"["
- RIGHT_SQUARE =
"]"
- LEFT_CURLY =
"{"
- RIGHT_CURLY =
"}"
- PERCENTAGE =
"%"
- GREATER_THAN =
">"
- EXCLAMATION =
"!"
- MAXIMUM_ALLOWED_CODEPOINT =
0x110000
Instance Attribute Summary collapse
Class Method Summary
collapse
Instance Method Summary
collapse
Constructor Details
#initialize(contents) ⇒ Tokenizer
Returns a new instance of Tokenizer.
12
13
14
15
|
# File 'lib/caps/tokenizer/infra.rb', line 12
def initialize(contents)
@contents = self.class.preprocess(contents)
setup
end
|
Instance Attribute Details
#contents ⇒ Object
Returns the value of attribute contents.
6
7
8
|
# File 'lib/caps/tokenizer/infra.rb', line 6
def contents
@contents
end
|
#tokens ⇒ Object
Returns the value of attribute tokens.
6
7
8
|
# File 'lib/caps/tokenizer/infra.rb', line 6
def tokens
@tokens
end
|
Class Method Details
.parse(src) ⇒ Object
8
9
10
|
# File 'lib/caps/tokenizer/infra.rb', line 8
def self.parse(src)
new(src).parse!
end
|
.stringify(tokens) ⇒ Object
455
456
457
458
459
460
461
462
|
# File 'lib/caps/tokenizer.rb', line 455
def self.stringify(tokens)
tokens.map do |i|
objs = [i[:type].to_s, "("]
objs << i[:value].inspect if i.key? :value
objs << ")"
objs.join
end.join(" ")
end
|
Instance Method Details
#consume_bad_url ⇒ Object
252
253
254
255
256
257
258
259
260
261
262
263
264
265
|
# File 'lib/caps/tokenizer.rb', line 252
def consume_bad_url
loop do
case
when eof?, peek == RIGHT_PARENS
return
when valid_escape?
consume_escaped_codepoint
else
advance
end
end
end
|
#consume_cdc_token ⇒ Object
156
157
158
159
160
161
162
163
164
165
|
# File 'lib/caps/tokenizer.rb', line 156
def consume_cdc_token
loc = mark_pos
3.times { advance }
@tokens << {
type: :cdc,
position: loc.finish
}
end
|
#consume_cdo_token ⇒ Object
146
147
148
149
150
151
152
153
154
|
# File 'lib/caps/tokenizer.rb', line 146
def consume_cdo_token
loc = mark_pos
4.times { advance }
@tokens << {
type: :cdo,
position: loc.finish
}
end
|
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
|
# File 'lib/caps/tokenizer.rb', line 271
def
return if peek != SOLIDUS || peek1 != ASTERISK
loc = mark_pos
2.times { advance } = scoped do
until eof?
break if peek == ASTERISK && peek1 == SOLIDUS
advance
end
end
return if eof?
2.times { advance }
@tokens << {
type: :comment,
value: .join,
position: loc.finish
}
end
|
#consume_delim_token ⇒ Object
391
392
393
|
# File 'lib/caps/tokenizer.rb', line 391
def consume_delim_token
pack_one(:delim)
end
|
#consume_escaped_codepoint ⇒ Object
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
|
# File 'lib/caps/tokenizer.rb', line 331
def consume_escaped_codepoint
if peek.hex?
hex = scoped do
advance
len = 0
until eof?
break unless peek.hex?
advance
len += 1
break if len == 5
end
end
advance if peek.whitespace?
hex = hex.join.to_i(16)
uni = [hex].pack("U")
return REPLACEMENT_CHARACTER if hex.zero? || uni.surrogate? || hex > MAXIMUM_ALLOWED_CODEPOINT
uni
elsif eof?
REPLACEMENT_CHARACTER
else
advance
end
end
|
#consume_hash_token ⇒ Object
377
378
379
380
381
382
383
384
385
386
387
388
389
|
# File 'lib/caps/tokenizer.rb', line 377
def consume_hash_token
loc = mark_pos
advance flag = ident_sequence_start? ? :id : nil
value = consume_ident_sequence
@tokens << {
type: :hash,
literal: @contents[loc.start[:idx]..@idx],
flag:,
value:,
position: loc.finish
}
end
|
#consume_ident_sequence ⇒ Object
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
|
# File 'lib/caps/tokenizer.rb', line 359
def consume_ident_sequence
result = []
until eof?
p = peek
if p.ident_char?
result << advance
elsif valid_escape?
advance
result << consume_escaped_codepoint
else
break
end
end
result.join
end
|
#consume_ident_token ⇒ Object
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
|
# File 'lib/caps/tokenizer.rb', line 167
def consume_ident_token
loc = mark_pos
string = consume_ident_sequence
if string.casecmp?("url") && peek == LEFT_PARENS
advance advance while peek.whitespace? && peek1.whitespace?
quotes = [DOUBLE_QUOTE, SINGLE_QUOTE]
if quotes.include?(peek) || (peek1.whitespace? && quotes.include?(peek))
@tokens << {
type: :function,
value: string,
position: loc.finish
}
else
consume_url_token(loc)
end
elsif peek1 == LEFT_PARENS
advance
@tokens << {
type: :function,
value: string,
position: loc.finish
}
else
@tokens << {
type: :ident,
value: string,
position: loc.finish
}
end
end
|
#consume_number ⇒ Object
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
|
# File 'lib/caps/tokenizer.rb', line 424
def consume_number
type = :integer
repr = []
repr << advance if [PLUS_SIGN, HYPHEN_MINUS].include? peek
repr << advance while peek.digit?
if peek == FULL_STOP && peek1.digit?
repr << advance repr << advance while peek.digit?
type = :number
end
p = peek
p1 = peek1
p2 = peek2
if %w[E e].include?(p) &&
(p1.digit? || ([PLUS_SIGN, HYPHEN_MINUS].include?(p1) && p2.digit?))
type = :number
repr << advance repr << advance if [PLUS_SIGN, HYPHEN_MINUS].include?(p1) repr << advance while peek.digit?
end
repr = repr.join
{
type:,
value: type == :integer ? repr.to_i : repr.to_f
}
end
|
#consume_numeric ⇒ Object
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
|
# File 'lib/caps/tokenizer.rb', line 395
def consume_numeric
loc = mark_pos
number = consume_number
if ident_sequence_start?
@tokens << {
type: :dimension,
value: number[:value],
flag: number[:type],
unit: consume_ident_sequence,
position: loc.finish
}
elsif peek == PERCENTAGE
advance @tokens << {
type: :percentage,
value: number[:value],
position: loc.finish
}
else
@tokens << {
type: :numeric,
value: number[:value],
flag: number[:type],
position: loc.finish
}
end
end
|
#consume_string ⇒ Object
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
|
# File 'lib/caps/tokenizer.rb', line 295
def consume_string
loc = mark_pos
ending_point = advance
type = :string
value = scoped do
until eof?
break if peek == ending_point
if peek == LINE_FEED
type = :bad_string
break
end
if peek == REVERSE_SOLIDUS
advance and return if peek1.nil?
2.times { advance }
next
end
advance
end
end
advance unless eof?
@tokens << {
type:,
delimiter: ending_point,
value: value.join,
position: loc.finish
}
end
|
#consume_token ⇒ Object
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
|
# File 'lib/caps/tokenizer.rb', line 43
def consume_token
return if eof?
chr = peek
case
when chr.whitespace?
consume_whitespace
when [SINGLE_QUOTE, DOUBLE_QUOTE].include?(chr)
consume_string
when chr == NUMBER_SIGN
return consume_hash_token if peek1.ident_char? || valid_escape?(offset: 1)
consume_delim_token
when chr == LEFT_PARENS
pack_one(:left_parens)
when chr == RIGHT_PARENS
pack_one(:right_parens)
when chr == COLON
pack_one(:colon)
when chr == SEMI
pack_one(:semicolon)
when chr == COMMA
pack_one(:comma)
when chr == LEFT_SQUARE
pack_one(:left_square)
when chr == RIGHT_SQUARE
pack_one(:right_square)
when chr == LEFT_CURLY
pack_one(:left_curly)
when chr == RIGHT_CURLY
pack_one(:right_curly)
when chr == FULL_STOP
if peek1.digit?
consume_numeric
else
consume_delim_token
end
when chr == HYPHEN_MINUS
if peek1.digit?
consume_numeric
elsif peek1 == HYPHEN_MINUS && peek2 == GREATER_THAN
consume_cdc_token
elsif ident_sequence_start?
consume_ident_token
else
consume_delim_token
end
when chr == LESS_THAN
is_cdo = isolated do
advance next_three = [peek, peek1, peek2]
next_three == [EXCLAMATION, HYPHEN_MINUS, HYPHEN_MINUS]
end
if is_cdo
consume_cdo_token
else
consume_delim_token
end
when chr == COMMERCIAL_AT
is_at_keyword = isolated do
advance ident_sequence_start?
end
loc = mark_pos
if is_at_keyword
advance @tokens << {
type: :at_keyword,
value: consume_ident_sequence,
position: loc.finish
}
else
consume_delim_token
end
when chr == REVERSE_SOLIDUS
if valid_escape?
consume_ident_token
else
loc = mark_pos
@tokens << {
type: :delim,
value: advance,
position: loc.finish
}
end
when chr.digit?
consume_numeric
when chr.ident_start?
consume_ident_token
else
consume_delim_token
end
end
|
#consume_url_token(loc = nil) ⇒ Object
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
|
# File 'lib/caps/tokenizer.rb', line 203
def consume_url_token(loc = nil)
loc ||= mark_pos
advance while peek.whitespace?
value = []
loop do
chr = peek
case
when chr == RIGHT_PARENS
advance
break
when eof?
break
when [DOUBLE_QUOTE, SINGLE_QUOTE, LEFT_PARENS].include?(chr), chr.non_printable?
consume_bad_url
@tokens << {
type: :bad_url,
position: loc.finish
}
return
when chr == REVERSE_SOLIDUS
if valid_escape?
value << consume_escaped_codepoint
else
consume_bad_url
@tokens << {
type: :bad_url,
position: loc.finish
}
return
end
else
value << advance
end
end
@tokens << {
type: :url,
value: value.join,
position: loc.finish
}
end
|
#consume_whitespace ⇒ Object
267
268
269
|
# File 'lib/caps/tokenizer.rb', line 267
def consume_whitespace
pack_while(:whitespace) { peek.whitespace? }
end
|
#parse! ⇒ Object
17
18
19
20
|
# File 'lib/caps/tokenizer/infra.rb', line 17
def parse!
consume_token until eof?
@tokens
end
|
#pos ⇒ Object
22
23
24
|
# File 'lib/caps/tokenizer/infra.rb', line 22
def pos
{ idx: @idx, line: @line, column: @column }
end
|
#push_node(type, **opts) ⇒ Object
39
40
41
|
# File 'lib/caps/tokenizer.rb', line 39
def push_node(type, **opts)
@tokens << { type: }.merge(opts)
end
|