Class: JSON::Lexer

Inherits:
Object show all
Defined in:
lib/json/lexer.rb

Instance Method Summary collapse

Constructor Details

#initialize(s) ⇒ Lexer

This method will initialize the lexer to contain a string.

Parameters
s

the string to initialize the lexer object with



35
36
37
38
# File 'lib/json/lexer.rb', line 35

def initialize(s)
  @index = 0
  @source = s
end

Instance Method Details

#backObject

Backs up the lexer status one character.



41
42
43
# File 'lib/json/lexer.rb', line 41

def back
  @index -= 1 if @index > 0
end

#eachObject



290
291
292
293
294
# File 'lib/json/lexer.rb', line 290

def each
  while (n = nextvalue)
    yield(n)
  end
end

#more?Boolean

Returns:

  • (Boolean)


45
46
47
# File 'lib/json/lexer.rb', line 45

def more?
  return(@index < @source.length)
end

#nextcharObject

Consumes the next character.



50
51
52
53
54
# File 'lib/json/lexer.rb', line 50

def nextchar
  c = self.more?() ? @source[@index,1] : "\0"
  @index += 1
  return(c)
end

#nextchars(n) ⇒ Object

Read the next n characters from the string in the lexer.

Parameters
n

the number of characters to read from the lexer



67
68
69
70
71
72
# File 'lib/json/lexer.rb', line 67

def nextchars(n)
  raise "substring bounds error" if (@index + n > @source.length)
  i = @index
  @index += n
  return(@source[i,n])
end

#nextcleanObject

Read the next n characters from the string with escape sequence processing.



76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# File 'lib/json/lexer.rb', line 76

def nextclean
  while true
  c = self.nextchar()
  if (c == '/')
    case self.nextchar()
    when '/'
 c = self.nextchar()
 while c != "\n" && c != "\r" && c != "\0"
   c = self.nextchar()
 end
    when '*'
 while true
   c = self.nextchar()
   raise "unclosed comment" if (c == "\0")
   if (c == '*')
    break if (self.nextchar() == '/')
    self.back()
   end
 end
    else
 self.back()
 return '/';
    end
  elsif c == "\0" || c[0] > " "[0]
    return(c)
  end
  end
end

#nextmatch(char) ⇒ Object

Consumes the next character and check that it matches a specified character.



58
59
60
61
62
# File 'lib/json/lexer.rb', line 58

def nextmatch(char)
  n = self.nextchar
  raise "Expected '#{char}' and instead saw '#{n}'." if (n != char)
  return(n)
end

#nextstring(quot) ⇒ Object

Reads the next string, given a quote character (usually ‘ or “)

Parameters

quot: the next matching quote character to use



163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
# File 'lib/json/lexer.rb', line 163

def nextstring(quot)
  c = buf = ""
  while true
  c = self.nextchar()
  case c
  when /\0|\n\r/
    raise "Unterminated string"
  when "\\"
    chr = self.nextchar()
    case chr
    when 'b'
 buf << "\b"
    when 't'
 buf << "\t"
    when 'n'
 buf << "\n"
    when 'f'
 buf << "\f"
    when 'r'
 buf << "\r"
    when 'u'
 buf << utf8str(Integer("0x" + self.nextchars(4)))
    else
 buf << chr
    end
  else
    return(buf) if (c == quot)
    buf << c
  end
  end
end

#nextto(regex) ⇒ Object

Reads the next group of characters that match a regular expresion.



198
199
200
201
202
203
204
205
206
207
208
# File 'lib/json/lexer.rb', line 198

def nextto(regex)
  buf = ""
  while (true)
  c = self.nextchar()
  if !(regex =~ c).nil? || c == '\0' || c == '\n' || c == '\r'
    self.back() if (c != '\0')
    return(buf.chomp())
  end
  buf += c
  end
end

#nextvalueObject

Reads the next value from the string. This can return either a string, a FixNum, a floating point value, a JSON array, or a JSON object.



213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
# File 'lib/json/lexer.rb', line 213

def nextvalue
  c = self.nextclean
  s = ""

  case c
  when /\"|\'/
  return(self.nextstring(c))
  when '{'
  self.back()
    return(Hash.new.from_json(self))
  when '['
  self.back()
  return(Array.new.from_json(self))
  else
  buf = ""
  while ((c =~ /"| |:|,|\]|\}|\/|\0/).nil?)
    buf += c
    c = self.nextchar()
  end
  self.back()
  s = buf.chomp
  case s
  when "true"
    return(true)
  when "false"
    return(false)
  when "null"
    return(nil)
  when /^[0-9]|\.|-|\+/
    begin
 return(Integer(s))
    rescue ArgumentError
 # do nothing on an error, the next case should do the trick
    end
    begin
 return(Float(s))
    rescue ArgumentError
 # do nothing
    end
  end
  if (s == "")
      s = nil
    end
  return(s)
  end
end

#skippast(to) ⇒ Object

Skip past the next instance of the character specified

Parameters
to

the character to skip past



285
286
287
288
# File 'lib/json/lexer.rb', line 285

def skippast(to)
  @index = @source.index(to, @index)
  @index = (@index.nil?) ? @source.length : @index + to.length
end

#skipto(to) ⇒ Object

Skip to the next instance of the character specified

Parameters
to

Character to skip to



263
264
265
266
267
268
269
270
271
272
273
274
275
276
# File 'lib/json/lexer.rb', line 263

def skipto(to)
  index = @index
  loop {
  c = self.nextchar()
  if (c == '\0')
    @index = index
    return(c)
  end
  if (c == to)
    self.back
    return(c)
  end
  }
end

#unescapeObject



278
279
280
# File 'lib/json/lexer.rb', line 278

def unescape
  @source = CGI::unescape(@source)
end

#utf8str(code) ⇒ Object

Given a Unicode code point, return a string giving its UTF-8 representation based on RFC 2279.



107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
# File 'lib/json/lexer.rb', line 107

def utf8str(code)
  if (code & ~(0x7f)) == 0
    # UCS-4 range 0x00000000 - 0x0000007F
    return(code.chr)
  end

  buf = ""
  if (code & ~(0x7ff)) == 0
    # UCS-4 range 0x00000080 - 0x000007FF
    buf << (0b11000000 | (code >> 6)).chr
    buf << (0b10000000 | (code & 0b00111111)).chr
    return(buf)
  end

  if (code & ~(0x000ffff)) == 0
    # UCS-4 range 0x00000800 - 0x0000FFFF
    buf << (0b11100000 | (code >> 12)).chr
    buf << (0b10000000 | ((code >> 6) & 0b00111111)).chr
    buf << (0b10000000 | (code & 0b0011111)).chr
    return(buf)
  end

  # Not used -- JSON only has UCS-2, but for the sake
  # of completeness
  if (code & ~(0x1FFFFF)) == 0
    # UCS-4 range 0x00010000 - 0x001FFFFF
    buf << (0b11110000 | (code >> 18)).chr
    buf << (0b10000000 | ((code >> 12) & 0b00111111)).chr
    buf << (0b10000000 | ((code >> 6) & 0b00111111)).chr
    buf << (0b10000000 | (code & 0b0011111)).chr
    return(buf)
  end

  if (code & ~(0x03FFFFFF)) == 0
    # UCS-4 range 0x00200000 - 0x03FFFFFF
    buf << (0b11110000 | (code >> 24)).chr
    buf << (0b10000000 | ((code >> 18) & 0b00111111)).chr
    buf << (0b10000000 | ((code >> 12) & 0b00111111)).chr
    buf << (0b10000000 | ((code >> 6) & 0b00111111)).chr
    buf << (0b10000000 | (code & 0b0011111)).chr
    return(buf)
  end

  # UCS-4 range 0x04000000 - 0x7FFFFFFF
  buf << (0b11111000 | (code >> 30)).chr
  buf << (0b10000000 | ((code >> 24) & 0b00111111)).chr
  buf << (0b10000000 | ((code >> 18) & 0b00111111)).chr
  buf << (0b10000000 | ((code >> 12) & 0b00111111)).chr
  buf << (0b10000000 | ((code >> 6) & 0b00111111)).chr
  buf << (0b10000000 | (code & 0b0011111)).chr
  return(buf)
end