Class: Caps::Tokenizer

Inherits:
Object
  • Object
show all
Defined in:
lib/caps/tokenizer.rb,
lib/caps/tokenizer/infra.rb,
lib/caps/tokenizer/helpers.rb,
lib/caps/tokenizer/location.rb

Defined Under Namespace

Modules: Helpers Classes: Location

Constant Summary collapse

LINE_FEED =
"\u000a"
REPLACEMENT_CHARACTER =
"\ufffd"
SOLIDUS =
"/"
REVERSE_SOLIDUS =
"\\"
ASTERISK =
"*"
SINGLE_QUOTE =
"'"
DOUBLE_QUOTE =
'"'
NUMBER_SIGN =
"#"
HYPHEN_MINUS =
"\u002d"
LEFT_PARENS =
"("
RIGHT_PARENS =
")"
PLUS_SIGN =
"+"
COMMA =
","
FULL_STOP =
"."
COLON =
":"
SEMI =
";"
LESS_THAN =
"<"
COMMERCIAL_AT =
"@"
LEFT_SQUARE =
"["
RIGHT_SQUARE =
"]"
LEFT_CURLY =
"{"
RIGHT_CURLY =
"}"
PERCENTAGE =
"%"
GREATER_THAN =
">"
EXCLAMATION =
"!"
MAXIMUM_ALLOWED_CODEPOINT =
0x110000

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(contents) ⇒ Tokenizer

Returns a new instance of Tokenizer.



12
13
14
15
# File 'lib/caps/tokenizer/infra.rb', line 12

def initialize(contents)
  @contents = self.class.preprocess(contents)
  setup
end

Instance Attribute Details

#contentsObject

Returns the value of attribute contents.



6
7
8
# File 'lib/caps/tokenizer/infra.rb', line 6

def contents
  @contents
end

#tokensObject

Returns the value of attribute tokens.



6
7
8
# File 'lib/caps/tokenizer/infra.rb', line 6

def tokens
  @tokens
end

Class Method Details

.parse(src) ⇒ Object



8
9
10
# File 'lib/caps/tokenizer/infra.rb', line 8

def self.parse(src)
  new(src).parse!
end

.stringify(tokens) ⇒ Object



455
456
457
458
459
460
461
462
# File 'lib/caps/tokenizer.rb', line 455

def self.stringify(tokens)
  tokens.map do |i|
    objs = [i[:type].to_s, "("]
    objs << i[:value].inspect if i.key? :value
    objs << ")"
    objs.join
  end.join(" ")
end

Instance Method Details

#consume_bad_urlObject



252
253
254
255
256
257
258
259
260
261
262
263
264
265
# File 'lib/caps/tokenizer.rb', line 252

def consume_bad_url
  loop do
    case
    when eof?, peek == RIGHT_PARENS
      return

    when valid_escape?
      consume_escaped_codepoint

    else
      advance
    end
  end
end

#consume_cdc_tokenObject



156
157
158
159
160
161
162
163
164
165
# File 'lib/caps/tokenizer.rb', line 156

def consume_cdc_token
  # the first hyphen has NOT been consumed. Advance three times.
  loc = mark_pos
  3.times { advance }

  @tokens << {
    type: :cdc,
    position: loc.finish
  }
end

#consume_cdo_tokenObject



146
147
148
149
150
151
152
153
154
# File 'lib/caps/tokenizer.rb', line 146

def consume_cdo_token
  loc = mark_pos
  4.times { advance }

  @tokens << {
    type: :cdo,
    position: loc.finish
  }
end

#consume_commentObject



271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
# File 'lib/caps/tokenizer.rb', line 271

def consume_comment
  return if peek != SOLIDUS || peek1 != ASTERISK

  loc = mark_pos
  2.times { advance } # Consume '/' and '*'
  comment_data = scoped do
    until eof?
      break if peek == ASTERISK && peek1 == SOLIDUS

      advance
    end
  end

  return if eof? # Malformed sheet?

  2.times { advance } # Consume '*' and '/'

  @tokens << {
    type: :comment,
    value: comment_data.join,
    position: loc.finish
  }
end

#consume_delim_tokenObject



391
392
393
# File 'lib/caps/tokenizer.rb', line 391

def consume_delim_token
  pack_one(:delim)
end

#consume_escaped_codepointObject



331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
# File 'lib/caps/tokenizer.rb', line 331

def consume_escaped_codepoint
  # Assumes REVERSE_SOLIDUS was already consumed
  if peek.hex?
    hex = scoped do
      advance

      len = 0
      until eof?
        break unless peek.hex?

        advance
        len += 1
        break if len == 5
      end
    end
    advance if peek.whitespace?
    hex = hex.join.to_i(16)
    uni = [hex].pack("U")
    return REPLACEMENT_CHARACTER if hex.zero? || uni.surrogate? || hex > MAXIMUM_ALLOWED_CODEPOINT

    uni
  elsif eof?
    REPLACEMENT_CHARACTER
  else
    advance
  end
end

#consume_hash_tokenObject



377
378
379
380
381
382
383
384
385
386
387
388
389
# File 'lib/caps/tokenizer.rb', line 377

def consume_hash_token
  loc = mark_pos
  advance # consume "#"
  flag = ident_sequence_start? ? :id : nil
  value = consume_ident_sequence
  @tokens << {
    type: :hash,
    literal: @contents[loc.start[:idx]..@idx],
    flag:,
    value:,
    position: loc.finish
  }
end

#consume_ident_sequenceObject



359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
# File 'lib/caps/tokenizer.rb', line 359

def consume_ident_sequence
  result = []
  until eof?
    p = peek

    if p.ident_char?
      result << advance
    elsif valid_escape?
      advance
      result << consume_escaped_codepoint
    else
      break
    end
  end

  result.join
end

#consume_ident_tokenObject



167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
# File 'lib/caps/tokenizer.rb', line 167

def consume_ident_token
  loc = mark_pos
  string = consume_ident_sequence
  if string.casecmp?("url") && peek == LEFT_PARENS
    advance # consume LEFT_PARENS
    advance while peek.whitespace? && peek1.whitespace?
    quotes = [DOUBLE_QUOTE, SINGLE_QUOTE]
    if quotes.include?(peek) || (peek1.whitespace? && quotes.include?(peek))
      @tokens << {
        type: :function,
        value: string,
        position: loc.finish
      }
      # next we will have optional whitespace followed by a string, so
      # just create the function token and move on.
    else
      # LEFT_PARENS was already consumed at this point, just consume the
      # url token and get the result.
      consume_url_token(loc)
    end
  elsif peek1 == LEFT_PARENS
    advance
    @tokens << {
      type: :function,
      value: string,
      position: loc.finish
    }
  else
    @tokens << {
      type: :ident,
      value: string,
      position: loc.finish
    }
  end
end

#consume_numberObject



424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
# File 'lib/caps/tokenizer.rb', line 424

def consume_number
  type = :integer
  repr = []
  repr << advance if [PLUS_SIGN, HYPHEN_MINUS].include? peek
  repr << advance while peek.digit?

  if peek == FULL_STOP && peek1.digit?
    repr << advance # Consume "."
    repr << advance while peek.digit?
    type = :number
  end

  p = peek
  p1 = peek1
  p2 = peek2
  if %w[E e].include?(p) &&
     (p1.digit? || ([PLUS_SIGN, HYPHEN_MINUS].include?(p1) && p2.digit?))
    type = :number
    repr << advance # consume "e" or "E"
    repr << advance if [PLUS_SIGN, HYPHEN_MINUS].include?(p1) # consume optional sign
    repr << advance while peek.digit?
  end

  repr = repr.join

  {
    type:,
    value: type == :integer ? repr.to_i : repr.to_f
  }
end

#consume_numericObject



395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
# File 'lib/caps/tokenizer.rb', line 395

def consume_numeric
  loc = mark_pos
  number = consume_number

  if ident_sequence_start?
    @tokens << {
      type: :dimension,
      value: number[:value],
      flag: number[:type],
      unit: consume_ident_sequence,
      position: loc.finish
    }
  elsif peek == PERCENTAGE
    advance # consume "%"
    @tokens << {
      type: :percentage,
      value: number[:value],
      position: loc.finish
    }
  else
    @tokens << {
      type: :numeric,
      value: number[:value],
      flag: number[:type],
      position: loc.finish
    }
  end
end

#consume_stringObject



295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
# File 'lib/caps/tokenizer.rb', line 295

def consume_string
  loc = mark_pos
  ending_point = advance
  type = :string

  value = scoped do
    until eof?
      break if peek == ending_point

      if peek == LINE_FEED
        # Do not advance. Only create bad-string and stop.
        type = :bad_string
        break
      end

      if peek == REVERSE_SOLIDUS
        advance and return if peek1.nil?

        2.times { advance }
        next
      end

      advance
    end
  end

  advance unless eof? # consume the ending_point left

  @tokens << {
    type:,
    delimiter: ending_point,
    value: value.join,
    position: loc.finish
  }
end

#consume_tokenObject



43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# File 'lib/caps/tokenizer.rb', line 43

def consume_token
  consume_comment
  return if eof?

  chr = peek
  case
  when chr.whitespace?
    consume_whitespace
  when [SINGLE_QUOTE, DOUBLE_QUOTE].include?(chr)
    consume_string
  when chr == NUMBER_SIGN
    return consume_hash_token if peek1.ident_char? || valid_escape?(offset: 1)

    consume_delim_token

  when chr == LEFT_PARENS
    pack_one(:left_parens)
  when chr == RIGHT_PARENS
    pack_one(:right_parens)
  when chr == COLON
    pack_one(:colon)
  when chr == SEMI
    pack_one(:semicolon)
  when chr == COMMA
    pack_one(:comma)
  when chr == LEFT_SQUARE
    pack_one(:left_square)
  when chr == RIGHT_SQUARE
    pack_one(:right_square)
  when chr == LEFT_CURLY
    pack_one(:left_curly)
  when chr == RIGHT_CURLY
    pack_one(:right_curly)
  when chr == FULL_STOP
    if peek1.digit?
      consume_numeric
    else
      consume_delim_token
    end
  when chr == HYPHEN_MINUS
    if peek1.digit?
      consume_numeric
    elsif peek1 == HYPHEN_MINUS && peek2 == GREATER_THAN
      consume_cdc_token
    elsif ident_sequence_start?
      consume_ident_token
    else
      consume_delim_token
    end
  when chr == LESS_THAN
    is_cdo = isolated do
      advance # consume LESS_THAN
      next_three = [peek, peek1, peek2]
      next_three == [EXCLAMATION, HYPHEN_MINUS, HYPHEN_MINUS]
    end

    if is_cdo
      consume_cdo_token
    else
      consume_delim_token
    end
  when chr == COMMERCIAL_AT
    is_at_keyword = isolated do
      advance # consume COMMERCIAL_AT
      ident_sequence_start?
    end

    loc = mark_pos

    if is_at_keyword
      advance # consume COMMERCIAL_AT
      @tokens << {
        type: :at_keyword,
        value: consume_ident_sequence,
        position: loc.finish
      }
    else
      consume_delim_token
    end

  when chr == REVERSE_SOLIDUS
    if valid_escape?
      consume_ident_token
    else
      loc = mark_pos
      @tokens << {
        type: :delim,
        value: advance,
        position: loc.finish
      }
    end

  when chr.digit?
    consume_numeric

  when chr.ident_start?
    consume_ident_token

  else
    consume_delim_token
  end
end

#consume_url_token(loc = nil) ⇒ Object



203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
# File 'lib/caps/tokenizer.rb', line 203

def consume_url_token(loc = nil)
  loc ||= mark_pos
  # Consume as much whitespace as possible
  advance while peek.whitespace?
  value = []

  loop do
    chr = peek
    case
    when chr == RIGHT_PARENS
      advance
      break

    when eof?
      break

    when [DOUBLE_QUOTE, SINGLE_QUOTE, LEFT_PARENS].include?(chr), chr.non_printable?
      # Parse error. Consume what's left of the url, return BAD_URL.
      consume_bad_url
      @tokens << {
        type: :bad_url,
        position: loc.finish
      }
      return

    when chr == REVERSE_SOLIDUS
      if valid_escape?
        value << consume_escaped_codepoint
      else
        consume_bad_url
        @tokens << {
          type: :bad_url,
          position: loc.finish
        }
        return
      end

    else
      value << advance
    end
  end

  @tokens << {
    type: :url,
    value: value.join,
    position: loc.finish
  }
end

#consume_whitespaceObject



267
268
269
# File 'lib/caps/tokenizer.rb', line 267

def consume_whitespace
  pack_while(:whitespace) { peek.whitespace? }
end

#parse!Object



17
18
19
20
# File 'lib/caps/tokenizer/infra.rb', line 17

def parse!
  consume_token until eof?
  @tokens
end

#posObject



22
23
24
# File 'lib/caps/tokenizer/infra.rb', line 22

def pos
  { idx: @idx, line: @line, column: @column }
end

#push_node(type, **opts) ⇒ Object



39
40
41
# File 'lib/caps/tokenizer.rb', line 39

def push_node(type, **opts)
  @tokens << { type: }.merge(opts)
end