Class: Hocon::Impl::Tokenizer::TokenIterator

Inherits:
Object
  • Object
show all
Extended by:
Forwardable
Defined in:
lib/hocon/impl/tokenizer.rb

Defined Under Namespace

Classes: WhitespaceSaver

Constant Summary collapse

FIRST_NUMBER_CHARS =

chars JSON allows a number to start with

"0123456789-"
NUMBER_CHARS =

chars JSON allows to be part of a number

"0123456789eE+-."
NOT_IN_UNQUOTED_TEXT =

chars that stop an unquoted string

"$\"{}[]:=,+#`^?!@*&\\"

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(origin, input, allow_comments) ⇒ TokenIterator

Returns a new instance of TokenIterator.



101
102
103
104
105
106
107
108
109
110
111
# File 'lib/hocon/impl/tokenizer.rb', line 101

def initialize(origin, input, allow_comments)
  @origin = origin
  @input = input
  @allow_comments = allow_comments
  @buffer = []
  @line_number = 1
  @line_origin = @origin.set_line_number(@line_number)
  @tokens = []
  @tokens << Tokens::START
  @whitespace_saver = WhitespaceSaver.new
end

Class Method Details

.problem(origin, what, message, suggest_quotes, cause) ⇒ Object



78
79
80
81
82
83
# File 'lib/hocon/impl/tokenizer.rb', line 78

def self.problem(origin, what, message, suggest_quotes, cause)
  if what.nil? || message.nil?
    throw Hocon::ConfigError::ConfigBugOrBrokenError.new("internal error, creating bad TokenizerProblemError", nil)
  end
  TokenizerProblemError.new(Tokens.new_problem(origin, what, message, suggest_quotes, cause))
end

.simple_value?(t) ⇒ Boolean

Returns:

  • (Boolean)


85
86
87
88
89
# File 'lib/hocon/impl/tokenizer.rb', line 85

def self.simple_value?(t)
  Tokens.substitution?(t) ||
      Tokens.unquoted_text?(t) ||
      Tokens.value?(t)
end

.whitespace?(c) ⇒ Boolean

Returns:

  • (Boolean)


91
92
93
# File 'lib/hocon/impl/tokenizer.rb', line 91

def self.whitespace?(c)
  Hocon::Impl::ConfigImplUtil.whitespace?(c)
end

.whitespace_not_newline?(c) ⇒ Boolean

Returns:

  • (Boolean)


95
96
97
# File 'lib/hocon/impl/tokenizer.rb', line 95

def self.whitespace_not_newline?(c)
  (c != "\n") and (Hocon::Impl::ConfigImplUtil.whitespace?(c))
end

Instance Method Details

#empty?Boolean

Returns:

  • (Boolean)


384
385
386
# File 'lib/hocon/impl/tokenizer.rb', line 384

def empty?
  @tokens.empty?
end

#nextObject



369
370
371
372
373
374
375
376
377
378
379
380
381
382
# File 'lib/hocon/impl/tokenizer.rb', line 369

def next
  t = @tokens.shift
  if (@tokens.empty?) and (t != Tokens::EOF)
    begin
      queue_next_token
    rescue TokenizerProblemError => e
      @tokens.push(e.problem)
    end
    if @tokens.empty?
      raise ConfigBugError, "bug: tokens queue should not be empty here"
    end
  end
  t
end

#next_char_after_whitespace(saver) ⇒ Object



155
156
157
158
159
160
161
162
163
164
165
166
167
168
# File 'lib/hocon/impl/tokenizer.rb', line 155

def next_char_after_whitespace(saver)
  while true
    c = next_char_raw
    if c == -1
      return -1
    else
      if self.class.whitespace_not_newline?(c)
        saver.add(c)
      else
        return c
      end
    end
  end
end

#next_char_rawObject



143
144
145
146
147
148
149
150
151
152
153
# File 'lib/hocon/impl/tokenizer.rb', line 143

def next_char_raw
  if @buffer.empty?
    begin
      @input.readchar.chr
    rescue EOFError
      -1
    end
  else
    @buffer.pop
  end
end

#pull_comment(first_char) ⇒ Object



213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
# File 'lib/hocon/impl/tokenizer.rb', line 213

def pull_comment(first_char)
  if first_char == '/'
    discard = next_char_raw
    if discard != '/'
      raise ConfigBugError, "called pullComment but // not seen"
    end
  end

  io = StringIO.new
  while true
    c = next_char_raw
    if (c == -1) || (c == "\n")
      put_back(c)
      return Tokens.new_comment(@line_origin, io.string)
    else
      io << c
    end
  end
end

#pull_next_token(saver) ⇒ Object



311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
# File 'lib/hocon/impl/tokenizer.rb', line 311

def pull_next_token(saver)
  c = next_char_after_whitespace(saver)
  if c == -1
    Tokens::EOF
  elsif c == "\n"
    # newline tokens have the just-ended line number
    line = Tokens.new_line(@line_origin)
    @line_number += 1
    @line_origin = @origin.set_line_number(@line_number)
    line
  else
    t = nil
    if start_of_comment?(c)
      t = pull_comment(c)
    else
      t = case c
            when '"' then pull_quoted_string
            when '$' then pull_substitution
            when ':' then Tokens::COLON
            when ',' then Tokens::COMMA
            when '=' then Tokens::EQUALS
            when '{' then Tokens::OPEN_CURLY
            when '}' then Tokens::CLOSE_CURLY
            when '[' then Tokens::OPEN_SQUARE
            when ']' then Tokens::CLOSE_SQUARE
            when '+' then pull_plus_equals
            else nil
          end

      if t.nil?
        if FIRST_NUMBER_CHARS.index(c)
          t = pull_number(c)
        elsif NOT_IN_UNQUOTED_TEXT.index(c)
          raise Hocon::Impl::Tokenizer::TokenIterator.problem(@line_origin, c, "Reserved character '#{c}' is not allowed outside quotes", true, nil)
        else
          put_back(c)
          t = pull_unquoted_text
        end
      end
    end

    if t.nil?
      raise ConfigBugError, "bug: failed to generate next token"
    end

    t
  end
end

#pull_number(first_char) ⇒ Object



233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
# File 'lib/hocon/impl/tokenizer.rb', line 233

def pull_number(first_char)
  sb = StringIO.new
  sb << first_char
  contained_decimal_or_e = false
  c = next_char_raw
  while (c != -1) && (NUMBER_CHARS.index(c))
    if (c == '.') ||
        (c == 'e') ||
        (c == 'E')
      contained_decimal_or_e = true
    end
    sb << c
    c = next_char_raw
  end
  # the last character we looked at wasn't part of the number, put it
  # back
  put_back(c)
  s = sb.string
  begin
    if contained_decimal_or_e
      # force floating point representation
      Tokens.new_double(@line_origin, s.to_f, s)
    else
      Tokens.new_long(@line_origin, s.to_i, s)
    end
  rescue ArgumentError => e
    if e.message =~ /^invalid value for (Float|Integer)\(\)/
      # not a number after all, see if it's an unquoted string.
      s.each do |u|
        if NOT_IN_UNQUOTED_TEXT.index
          raise self.problem(@line_origin, u, "Reserved character '#{u}'" +
            "is not allowed outside quotes", true, nil)
        end
      end
      # no evil chars so we just decide this was a string and
      # not a number.
      Tokens.new_unquoted_text(@line_origin, s)
    else
      raise e
    end
  end
end

#pull_quoted_stringObject



276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
# File 'lib/hocon/impl/tokenizer.rb', line 276

def pull_quoted_string
  # the open quote has already been consumed
  sb = StringIO.new
  c = ""
  while c != '"'
    c = next_char_raw
    if c == -1
      raise self.problem(@line_origin, c, "End of input but string quote was still open", false, nil)
    end

    if c == "\\"
      pull_escape_sequence(sb)
    elsif c == '"'
      # done!
    elsif c =~ /[[:cntrl:]]/
      raise self.problem(@line_origin, c, "JSON does not allow unescaped #{c}" +
                        " in quoted strings, use a backslash escape", false, nil)
    else
      sb << c
    end
  end

  # maybe switch to triple-quoted string, sort of hacky...
  if sb.length == 0
    third = next_char_raw
    if third == '"'
      append_triple_quoted_string(sb)
    else
      put_back(third)
    end
  end

  Tokens.new_string(@line_origin, sb.string)
end

#pull_unquoted_textObject

The rules here are intended to maximize convenience while avoiding confusion with real valid JSON. Basically anything that parses as JSON is treated the JSON way and otherwise we assume it’s a string and let the parser sort it out.



174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
# File 'lib/hocon/impl/tokenizer.rb', line 174

def pull_unquoted_text
  origin = @line_origin
  io = StringIO.new
  c = next_char_raw
  while true
    if (c == -1) or
        (NOT_IN_UNQUOTED_TEXT.index(c)) or
        (self.class.whitespace?(c)) or
        (start_of_comment?(c))
      break
    else
      io << c
    end

    # we parse true/false/null tokens as such no matter
    # what is after them, as long as they are at the
    # start of the unquoted token.
    if io.length == 4
      if io.string == "true"
        return Tokens.new_boolean(origin, true)
      elsif io.string == "null"
        return Tokens.new_null(origin)
      end
    elsif io.length  == 5
      if io.string == "false"
        return Tokens.new_boolean(origin, false)
      end
    end

    c = next_char_raw
  end

  # put back the char that ended the unquoted text
  put_back(c)

  Tokens.new_unquoted_text(origin, io.string)
end

#put_back(c) ⇒ Object



136
137
138
139
140
141
# File 'lib/hocon/impl/tokenizer.rb', line 136

def put_back(c)
  if @buffer.length > 2
    raise ConfigBugError, "bug: putBack() three times, undesirable look-ahead"
  end
  @buffer.push(c)
end

#queue_next_tokenObject



360
361
362
363
364
365
366
367
# File 'lib/hocon/impl/tokenizer.rb', line 360

def queue_next_token
  t = pull_next_token(@whitespace_saver)
  whitespace = @whitespace_saver.check(t, @origin, @line_number)
  if whitespace
    @tokens.push(whitespace)
  end
  @tokens.push(t)
end

#start_of_comment?(c) ⇒ Boolean

Returns:

  • (Boolean)


113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# File 'lib/hocon/impl/tokenizer.rb', line 113

def start_of_comment?(c)
  if c == -1
    false
  else
    if @allow_comments
      if c == '#'
        true
      elsif c == '/'
        maybe_second_slash = next_char_raw
        # we want to predictably NOT consume any chars
        put_back(maybe_second_slash)
        if maybe_second_slash == '/'
          true
        else
          false
        end
      end
    else
      false
    end
  end
end