Class: RubyLexer

Inherits:
Object
  • Object
show all
Includes:
State::Values
Defined in:
lib/ruby_lexer.rex.rb,
lib/ruby_lexer.rb,
lib/ruby_lexer.rb

Overview

The generated lexer RubyLexer

Defined Under Namespace

Classes: LexerError, ScanError, State

Constant Summary collapse

HAS_ENC =

:stopdoc:

"".respond_to? :encoding
IDENT_CHAR =
if HAS_ENC then
  /[\w\u0080-\u{10ffff}]/u
else
  /[\w\x80-\xFF]/n
end
EOF =
:eof_haha!
STR_FUNC_BORING =

ruby constants for strings (should this be moved somewhere else?)

0x00
STR_FUNC_ESCAPE =

TODO: remove and replace with REGEXP

0x01
STR_FUNC_EXPAND =
0x02
STR_FUNC_REGEXP =
0x04
STR_FUNC_QWORDS =
0x08
STR_FUNC_SYMBOL =
0x10
STR_FUNC_INDENT =

<<-HEREDOC

0x20
STR_FUNC_ICNTNT =

<<~HEREDOC

0x40
STR_SQUOTE =
STR_FUNC_BORING
STR_DQUOTE =
STR_FUNC_BORING | STR_FUNC_EXPAND
STR_XQUOTE =
STR_FUNC_BORING | STR_FUNC_EXPAND
STR_REGEXP =
STR_FUNC_REGEXP | STR_FUNC_ESCAPE | STR_FUNC_EXPAND
STR_SSYM =
STR_FUNC_SYMBOL
STR_DSYM =
STR_FUNC_SYMBOL | STR_FUNC_EXPAND
ESCAPES =
{
  "a"    => "\007",
  "b"    => "\010",
  "e"    => "\033",
  "f"    => "\f",
  "n"    => "\n",
  "r"    => "\r",
  "s"    => " ",
  "t"    => "\t",
  "v"    => "\13",
  "\\"   => '\\',
  "\n"   => "",
  "C-\?" => 127.chr,
  "c\?"  => 127.chr,
}
TOKENS =
{
  "!"   => :tBANG,
  "!="  => :tNEQ,
  # "!@"  => :tUBANG,
  "!~"  => :tNMATCH,
  ","   => :tCOMMA,
  ".."  => :tDOT2,
  "..." => :tDOT3,
  "="   => :tEQL,
  "=="  => :tEQ,
  "===" => :tEQQ,
  "=>"  => :tASSOC,
  "=~"  => :tMATCH,
  "->"  => :tLAMBDA,
}
TAB_WIDTH =
8
IDENT =

:stopdoc:

/^#{IDENT_CHAR}+/o
ESC =
/\\((?>[0-7]{1,3}|x[0-9a-fA-F]{1,2}|M-[^\\]|(C-|c)[^\\]|u[0-9a-fA-F]{1,4}|u\{[0-9a-fA-F]+\}|[^0-7xMCc]))/
SIMPLE_STRING =
/((#{ESC}|\#(#{ESC}|[^\{\#\@\$\"\\])|[^\"\\\#])*)/o
SSTRING =
/((\\.|[^\'])*)/
INT_DEC =
/[+]?(?:(?:[1-9][\d_]*|0)(?!\.\d)(ri|r|i)?\b|0d[0-9_]+)(ri|r|i)?/i
INT_HEX =
/[+]?0x[a-f0-9_]+(ri|r|i)?/i
INT_BIN =
/[+]?0b[01_]+(ri|r|i)?/i
INT_OCT =
/[+]?0o?[0-7_]+(ri|r|i)?|0o(ri|r|i)?/i
FLOAT =
/[+]?\d[\d_]*\.[\d_]+(e[+-]?[\d_]+)?(?:(ri|r|i)\b)?|[+]?[\d_]+e[+-]?[\d_]+(?:(ri|r|i)\b)?/i
INT_DEC2 =
/[+]?\d[0-9_]*(?![e])((ri|r|i)\b)?/i
NUM_BAD =
/[+]?0[xbd]\b/i
INT_OCT_BAD =
/[+]?0o?[0-7_]*[89]/i
FLOAT_BAD =
/[+]?\d[\d_]*_(e|\.)/i
@@regexp_cache =
Hash.new { |h,k| h[k] = Regexp.new(Regexp.escape(k)) }

Constants included from State::Values

State::Values::EXPR_ARG, State::Values::EXPR_ARG_ANY, State::Values::EXPR_BEG, State::Values::EXPR_BEG_ANY, State::Values::EXPR_CLASS, State::Values::EXPR_CMDARG, State::Values::EXPR_DOT, State::Values::EXPR_END, State::Values::EXPR_ENDARG, State::Values::EXPR_ENDFN, State::Values::EXPR_END_ANY, State::Values::EXPR_FITEM, State::Values::EXPR_FNAME, State::Values::EXPR_LAB, State::Values::EXPR_LABEL, State::Values::EXPR_LABELED, State::Values::EXPR_MID, State::Values::EXPR_NONE, State::Values::EXPR_NUM, State::Values::EXPR_PAD, State::Values::EXPR_PAR

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(_ = nil) ⇒ RubyLexer

Returns a new instance of RubyLexer.



201
202
203
204
205
206
207
208
209
# File 'lib/ruby_lexer.rb', line 201

def initialize _ = nil
  @lex_state = nil # remove one warning under $DEBUG
  self.lex_state = EXPR_NONE

  self.cond   = RubyParserStuff::StackState.new(:cond, $DEBUG)
  self.cmdarg = RubyParserStuff::StackState.new(:cmdarg, $DEBUG)

  reset
end

Instance Attribute Details

#brace_nestObject

Returns the value of attribute brace_nest.



176
177
178
# File 'lib/ruby_lexer.rb', line 176

def brace_nest
  @brace_nest
end

#cmd_stateObject

temporary–ivar to avoid passing everywhere



179
180
181
# File 'lib/ruby_lexer.rb', line 179

def cmd_state
  @cmd_state
end

#cmdargObject

Returns the value of attribute cmdarg.



177
178
179
# File 'lib/ruby_lexer.rb', line 177

def cmdarg
  @cmdarg
end

#command_startObject

Returns the value of attribute command_start.



178
179
180
# File 'lib/ruby_lexer.rb', line 178

def command_start
  @command_start
end

#commentsObject

TODO: remove this… maybe comment_string + attr_accessor



224
225
226
227
228
# File 'lib/ruby_lexer.rb', line 224

def comments # TODO: remove this... maybe comment_string + attr_accessor
  c = @comments.join
  @comments.clear
  c
end

#condObject

Returns the value of attribute cond.



181
182
183
# File 'lib/ruby_lexer.rb', line 181

def cond
  @cond
end

#extra_linenoObject

Returns the value of attribute extra_lineno.



182
183
184
# File 'lib/ruby_lexer.rb', line 182

def extra_lineno
  @extra_lineno
end

#filenameObject

The file name / path



41
42
43
# File 'lib/ruby_lexer.rex.rb', line 41

def filename
  @filename
end

#last_stateObject

Returns the value of attribute last_state.



180
181
182
# File 'lib/ruby_lexer.rb', line 180

def last_state
  @last_state
end

#lex_stateObject

Returns the value of attribute lex_state.



134
135
136
# File 'lib/ruby_lexer.rb', line 134

def lex_state
  @lex_state
end

#lex_strtermObject

Additional context surrounding tokens that both the lexer and grammar use.



188
189
190
# File 'lib/ruby_lexer.rb', line 188

def lex_strterm
  @lex_strterm
end

#linenoObject

:startdoc:



175
176
177
# File 'lib/ruby_lexer.rb', line 175

def lineno
  @lineno
end

#lpar_begObject

Returns the value of attribute lpar_beg.



189
190
191
# File 'lib/ruby_lexer.rb', line 189

def lpar_beg
  @lpar_beg
end

#paren_nestObject

Returns the value of attribute paren_nest.



190
191
192
# File 'lib/ruby_lexer.rb', line 190

def paren_nest
  @paren_nest
end

#parserObject

HACK for very end of lexer… sigh



191
192
193
# File 'lib/ruby_lexer.rb', line 191

def parser
  @parser
end

#space_seenObject

Returns the value of attribute space_seen.



192
193
194
# File 'lib/ruby_lexer.rb', line 192

def space_seen
  @space_seen
end

#ssObject Also known as: match

The StringScanner for this lexer.



46
47
48
# File 'lib/ruby_lexer.rex.rb', line 46

def ss
  @ss
end

#stateObject

The current lexical state.



51
52
53
# File 'lib/ruby_lexer.rex.rb', line 51

def state
  @state
end

#string_bufferObject

Returns the value of attribute string_buffer.



193
194
195
# File 'lib/ruby_lexer.rb', line 193

def string_buffer
  @string_buffer
end

#string_nestObject

Returns the value of attribute string_nest.



194
195
196
# File 'lib/ruby_lexer.rb', line 194

def string_nest
  @string_nest
end

#tokenObject

Last token read via next_token.



197
198
199
# File 'lib/ruby_lexer.rb', line 197

def token
  @token
end

Instance Method Details

#actionObject

Yields on the current action.



67
68
69
# File 'lib/ruby_lexer.rex.rb', line 67

def action
  yield
end

#arg_ambiguousObject



211
212
213
# File 'lib/ruby_lexer.rb', line 211

def arg_ambiguous
  self.warning("Ambiguous first argument. make sure.")
end

#arg_stateObject



215
216
217
# File 'lib/ruby_lexer.rb', line 215

def arg_state
  is_after_operator? ? EXPR_ARG : EXPR_BEG
end

#beginning_of_line?Boolean Also known as: bol?

Returns:

  • (Boolean)


219
220
221
# File 'lib/ruby_lexer.rb', line 219

def beginning_of_line?
  ss.bol?
end

#check(re) ⇒ Object



1091
1092
1093
# File 'lib/ruby_lexer.rb', line 1091

def check re
  ss.check re
end

#d(o) ⇒ Object



1439
1440
1441
# File 'lib/ruby_lexer.rb', line 1439

def d o
  $stderr.puts o.inspect
end

#dedent_string(string, width) ⇒ Object



323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
# File 'lib/ruby_lexer.rb', line 323

def dedent_string(string, width)
  characters_skipped = 0
  indentation_skipped = 0

  string.chars.each do |char|
    break if indentation_skipped >= width
    if char == ' '
      characters_skipped += 1
      indentation_skipped += 1
    elsif char == "\t"
      proposed = TAB_WIDTH * (indentation_skipped / TAB_WIDTH + 1)
      break if (proposed > width)
      characters_skipped += 1
      indentation_skipped = proposed
    end
  end
  string[characters_skipped..-1]
end

#eat_whitespaceObject



1095
1096
1097
1098
1099
# File 'lib/ruby_lexer.rb', line 1095

def eat_whitespace
  r = scan(/\s+/)
  self.extra_lineno += r.count("\n") if r
  r
end

#end_of_stream?Boolean

Returns:

  • (Boolean)


230
231
232
# File 'lib/ruby_lexer.rb', line 230

def end_of_stream?
  ss.eos?
end

#expr_dot?Boolean

Returns:

  • (Boolean)


234
235
236
# File 'lib/ruby_lexer.rb', line 234

def expr_dot?
  lex_state =~ EXPR_DOT
end

#expr_fname?Boolean

REFACTOR

Returns:

  • (Boolean)


238
239
240
# File 'lib/ruby_lexer.rb', line 238

def expr_fname? # REFACTOR
  lex_state =~ EXPR_FNAME
end

#expr_result(token, text) ⇒ Object



242
243
244
245
246
# File 'lib/ruby_lexer.rb', line 242

def expr_result token, text
  cond.push false
  cmdarg.push false
  result EXPR_BEG, token, text
end

#fixup_lineno(extra = 0) ⇒ Object



1101
1102
1103
1104
# File 'lib/ruby_lexer.rb', line 1101

def fixup_lineno extra = 0
  self.lineno += self.extra_lineno + extra
  self.extra_lineno = 0
end

#heredoc(here) ⇒ Object

TODO: rewrite / remove



248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
# File 'lib/ruby_lexer.rb', line 248

def heredoc here # TODO: rewrite / remove
  _, eos, func, last_line = here

  indent         = (func & STR_FUNC_INDENT) != 0 ? "[ \t]*" : nil
  content_indent = (func & STR_FUNC_ICNTNT) != 0
  expand         = (func & STR_FUNC_EXPAND) != 0
  eos_re         = /#{indent}#{Regexp.escape eos}(\r*\n|\z)/
  err_msg        = "can't match #{eos_re.inspect} anywhere in "

  rb_compile_error err_msg if end_of_stream?

  if beginning_of_line? && scan(eos_re) then
    self.lineno += 1
    ss.unread_many last_line # TODO: figure out how to remove this
    return :tSTRING_END, eos
  end

  self.string_buffer = []

  if expand then
    case
    when scan(/#[$@]/) then
      ss.pos -= 1 # FIX omg stupid
      return :tSTRING_DVAR, matched
    when scan(/#[{]/) then
      return :tSTRING_DBEG, matched
    when scan(/#/) then
      string_buffer << '#'
    end

    begin
      c = tokadd_string func, "\n", nil

      rb_compile_error err_msg if
        c == RubyLexer::EOF

      if c != "\n" then
        return :tSTRING_CONTENT, string_buffer.join.delete("\r")
      else
        string_buffer << scan(/\n/)
      end

      rb_compile_error err_msg if end_of_stream?
    end until check(eos_re)
  else
    until check(eos_re) do
      string_buffer << scan(/.*(\n|\z)/)
      rb_compile_error err_msg if end_of_stream?
    end
  end

  self.lex_strterm = [:heredoc, eos, func, last_line]

  string_content = begin
                     s = string_buffer.join
                     s.delete "\r"
                   rescue ArgumentError
                     s.b.delete("\r").force_encoding Encoding::UTF_8
                   end

  string_content = heredoc_dedent(string_content) if content_indent && ruby23plus?

  return :tSTRING_CONTENT, string_content
end

#heredoc_dedent(string_content) ⇒ Object



313
314
315
316
317
318
319
320
321
# File 'lib/ruby_lexer.rb', line 313

def heredoc_dedent(string_content)
  width = string_content.scan(/^[ \t]*(?=\S)/).map do |whitespace|
    heredoc_whitespace_indent_size whitespace
  end.min || 0

  string_content.split("\n", -1).map do |line|
    dedent_string line, width
  end.join "\n"
end

#heredoc_identifierObject

TODO: remove / rewrite



352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
# File 'lib/ruby_lexer.rb', line 352

def heredoc_identifier # TODO: remove / rewrite
  term, func = nil, STR_FUNC_BORING
  self.string_buffer = []

  heredoc_indent_mods = '-'
  heredoc_indent_mods += '\~' if ruby23plus?

  case
  when scan(/([#{heredoc_indent_mods}]?)([\'\"\`])(.*?)\2/) then
    term = ss[2]
    func |= STR_FUNC_INDENT unless ss[1].empty?
    func |= STR_FUNC_ICNTNT if ss[1] == '~'
    func |= case term
            when "\'" then
              STR_SQUOTE
            when '"' then
              STR_DQUOTE
            else
              STR_XQUOTE
            end
    string_buffer << ss[3]
  when scan(/[#{heredoc_indent_mods}]?([\'\"\`])(?!\1*\Z)/) then
    rb_compile_error "unterminated here document identifier"
  when scan(/([#{heredoc_indent_mods}]?)(#{IDENT_CHAR}+)/) then
    term = '"'
    func |= STR_DQUOTE
    unless ss[1].empty? then
      func |= STR_FUNC_INDENT
      func |= STR_FUNC_ICNTNT if ss[1] == '~'
    end
    string_buffer << ss[2]
  else
    return nil
  end

  if scan(/.*\n/) then
    # TODO: think about storing off the char range instead
    line = matched
  else
    line = nil
  end

  self.lex_strterm = [:heredoc, string_buffer.join, func, line]

  if term == '`' then
    result nil, :tXSTRING_BEG, "`"
  else
    result nil, :tSTRING_BEG, "\""
  end
end

#heredoc_whitespace_indent_size(whitespace) ⇒ Object



342
343
344
345
346
347
348
349
350
# File 'lib/ruby_lexer.rb', line 342

def heredoc_whitespace_indent_size(whitespace)
  whitespace.chars.inject 0 do |size, char|
    if char == "\t"
      size + TAB_WIDTH
    else
      size + 1
    end
  end
end

#in_fname?Boolean

REFACTOR

Returns:

  • (Boolean)


403
404
405
# File 'lib/ruby_lexer.rb', line 403

def in_fname? # REFACTOR
  lex_state =~ EXPR_FNAME
end

#int_with_base(base) ⇒ Object



411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
# File 'lib/ruby_lexer.rb', line 411

def int_with_base base
  rb_compile_error "Invalid numeric format" if matched =~ /__/

  text = matched
  case
  when text.end_with?('ri')
    return result(EXPR_NUM, :tIMAGINARY, Complex(0, Rational(text.chop.chop.to_i(base))))
  when text.end_with?('r')
    return result(EXPR_NUM, :tRATIONAL, Rational(text.chop.to_i(base)))
  when text.end_with?('i')
    return result(EXPR_NUM, :tIMAGINARY, Complex(0, text.chop.to_i(base)))
  else
    return result(EXPR_NUM, :tINTEGER, text.to_i(base))
  end
end

#is_after_operator?Boolean

Returns:

  • (Boolean)


407
408
409
# File 'lib/ruby_lexer.rb', line 407

def is_after_operator?
  lex_state =~ EXPR_FNAME|EXPR_DOT
end

#is_arg?Boolean

Returns:

  • (Boolean)


427
428
429
# File 'lib/ruby_lexer.rb', line 427

def is_arg?
  lex_state =~ EXPR_ARG_ANY
end

#is_beg?Boolean

Returns:

  • (Boolean)


431
432
433
# File 'lib/ruby_lexer.rb', line 431

def is_beg?
  lex_state =~ EXPR_BEG_ANY || lex_state == EXPR_LAB # yes, == EXPR_LAB
end

#is_end?Boolean

Returns:

  • (Boolean)


435
436
437
# File 'lib/ruby_lexer.rb', line 435

def is_end?
  lex_state =~ EXPR_END_ANY
end

#is_label_possible?Boolean

Returns:

  • (Boolean)


448
449
450
# File 'lib/ruby_lexer.rb', line 448

def is_label_possible?
  (lex_state =~ EXPR_LABEL|EXPR_ENDFN && !cmd_state) || is_arg?
end

#is_label_suffix?Boolean

Returns:

  • (Boolean)


452
453
454
# File 'lib/ruby_lexer.rb', line 452

def is_label_suffix?
  check(/:(?!:)/)
end

#is_space_arg?(c = "x") ⇒ Boolean

Returns:

  • (Boolean)


456
457
458
# File 'lib/ruby_lexer.rb', line 456

def is_space_arg? c = "x"
  is_arg? and space_seen and c !~ /\s/
end

#lambda_beginning?Boolean

Returns:

  • (Boolean)


460
461
462
# File 'lib/ruby_lexer.rb', line 460

def lambda_beginning?
  lpar_beg && lpar_beg == paren_nest
end

#locationObject

The current location in the parse.



102
103
104
105
106
# File 'lib/ruby_lexer.rex.rb', line 102

def location
  [
    (filename || "<input>"),
  ].compact.join(":")
end

#lvar_defined?(id) ⇒ Boolean

Returns:

  • (Boolean)


439
440
441
442
# File 'lib/ruby_lexer.rb', line 439

def lvar_defined? id
  # TODO: (dyna_in_block? && dvar_defined?(id)) || local_id?(id)
  self.parser.env[id.to_sym] == :lvar
end

#matchedObject



464
465
466
# File 'lib/ruby_lexer.rb', line 464

def matched
  ss.matched
end

#matchesObject

The match groups for the current scan.



58
59
60
61
62
# File 'lib/ruby_lexer.rex.rb', line 58

def matches
  m = (1..9).map { |i| ss[i] }
  m.pop until m[-1] or m.empty?
  m
end

#next_tokenObject

Lex the next token.



111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
# File 'lib/ruby_lexer.rex.rb', line 111

def next_token
  return process_string if lex_strterm
  self.cmd_state = self.command_start
  self.command_start = false
  self.space_seen    = false # TODO: rename token_seen?
  self.last_state    = lex_state

  token = nil

  until ss.eos? or token do
    token =
      case state
      when nil then
        case
        when ss.skip(/[\ \t\r\f\v]/) then
          action { self.space_seen = true; next }
        when text = ss.scan(/\n|\#/) then
          process_newline_or_comment text
        when text = ss.scan(/[\]\)\}]/) then
          process_brace_close text
        when ss.match?(/\!/) then
          case
          when is_after_operator? && (ss.skip(/\!\@/)) then
            action { result EXPR_ARG, :tUBANG, "!@" }
          when text = ss.scan(/\![=~]?/) then
            action { result :arg_state, TOKENS[text], text }
          end # group /\!/
        when ss.match?(/\./) then
          case
          when text = ss.scan(/\.\.\.?/) then
            action { result EXPR_BEG, TOKENS[text], text }
          when ss.skip(/\.\d/) then
            action { rb_compile_error "no .<digit> floating literal anymore put 0 before dot" }
          when ss.skip(/\./) then
            action { self.lex_state = EXPR_BEG; result EXPR_DOT, :tDOT, "." }
          end # group /\./
        when text = ss.scan(/\(/) then
          process_paren text
        when text = ss.scan(/\,/) then
          action { result EXPR_PAR, TOKENS[text], text }
        when ss.match?(/=/) then
          case
          when text = ss.scan(/\=\=\=|\=\=|\=~|\=>|\=(?!begin\b)/) then
            action { result arg_state, TOKENS[text], text }
          when bol? && (text = ss.scan(/\=begin(?=\s)/)) then
            process_begin text
          when text = ss.scan(/\=(?=begin\b)/) then
            action { result arg_state, TOKENS[text], text }
          end # group /=/
        when ruby22_label? && (text = ss.scan(/\"#{SIMPLE_STRING}\":/o)) then
          process_label text
        when text = ss.scan(/\"(#{SIMPLE_STRING})\"/o) then
          action { result EXPR_END, :tSTRING, text[1..-2].gsub(ESC) { unescape $1 } }
        when text = ss.scan(/\"/) then
          action { string STR_DQUOTE; result nil, :tSTRING_BEG, text }
        when text = ss.scan(/\@\@?\d/) then
          action { rb_compile_error "`#{text}` is not allowed as a variable name" }
        when text = ss.scan(/\@\@?#{IDENT_CHAR}+/o) then
          process_ivar text
        when ss.match?(/:/) then
          case
          when not_end? && (text = ss.scan(/:([a-zA-Z_]#{IDENT_CHAR}*(?:[?]|[!](?!=)|=(?==>)|=(?![=>]))?)/o)) then
            process_symbol text
          when not_end? && (text = ss.scan(/\:\"(#{SIMPLE_STRING})\"/o)) then
            process_symbol text
          when not_end? && (text = ss.scan(/\:\'(#{SSTRING})\'/o)) then
            process_symbol text
          when text = ss.scan(/\:\:/) then
            process_colon2 text
          when text = ss.scan(/\:/) then
            process_colon1 text
          end # group /:/
        when ss.skip(/->/) then
          action { result EXPR_ENDFN, :tLAMBDA, nil }
        when text = ss.scan(/[+-]/) then
          process_plus_minus text
        when ss.match?(/[+\d]/) then
          case
          when ss.skip(/#{NUM_BAD}/o) then
            action { rb_compile_error "Invalid numeric format"  }
          when ss.skip(/#{INT_DEC}/o) then
            action { int_with_base 10                           }
          when ss.skip(/#{INT_HEX}/o) then
            action { int_with_base 16                           }
          when ss.skip(/#{INT_BIN}/o) then
            action { int_with_base 2                            }
          when ss.skip(/#{INT_OCT_BAD}/o) then
            action { rb_compile_error "Illegal octal digit."    }
          when ss.skip(/#{INT_OCT}/o) then
            action { int_with_base 8                            }
          when ss.skip(/#{FLOAT_BAD}/o) then
            action { rb_compile_error "Trailing '_' in number." }
          when text = ss.scan(/#{FLOAT}/o) then
            process_float text
          when ss.skip(/#{INT_DEC2}/o) then
            action { int_with_base 10                           }
          when ss.skip(/[0-9]/) then
            action { rb_compile_error "Bad number format" }
          end # group /[+\d]/
        when text = ss.scan(/\[/) then
          process_square_bracket text
        when was_label? && (text = ss.scan(/\'#{SSTRING}\':?/o)) then
          process_label_or_string text
        when ss.match?(/\|/) then
          case
          when ss.skip(/\|\|\=/) then
            action { result EXPR_BEG, :tOP_ASGN, "||" }
          when ss.skip(/\|\|/) then
            action { result EXPR_BEG, :tOROP,    "||" }
          when ss.skip(/\|\=/) then
            action { result EXPR_BEG, :tOP_ASGN, "|" }
          when ss.skip(/\|/) then
            action { state = is_after_operator? ? EXPR_ARG : EXPR_PAR; result state, :tPIPE, "|" }
          end # group /\|/
        when text = ss.scan(/\{/) then
          process_brace_open text
        when ss.match?(/\*/) then
          case
          when ss.skip(/\*\*=/) then
            action { result EXPR_BEG, :tOP_ASGN, "**" }
          when ss.skip(/\*\*/) then
            action { result(:arg_state, space_vs_beginning(:tDSTAR, :tDSTAR, :tPOW), "**") }
          when ss.skip(/\*\=/) then
            action { result(EXPR_BEG, :tOP_ASGN, "*") }
          when ss.skip(/\*/) then
            action { result(:arg_state, space_vs_beginning(:tSTAR, :tSTAR, :tSTAR2), "*") }
          end # group /\*/
        when ss.match?(/</) then
          case
          when ss.skip(/\<\=\>/) then
            action { result :arg_state, :tCMP, "<=>"    }
          when ss.skip(/\<\=/) then
            action { result :arg_state, :tLEQ, "<="     }
          when ss.skip(/\<\<\=/) then
            action { result EXPR_BEG,  :tOP_ASGN, "<<" }
          when text = ss.scan(/\<\</) then
            process_lchevron text
          when ss.skip(/\</) then
            action { result :arg_state, :tLT, "<"       }
          end # group /</
        when ss.match?(/>/) then
          case
          when ss.skip(/\>\=/) then
            action { result :arg_state, :tGEQ, ">="     }
          when ss.skip(/\>\>=/) then
            action { result EXPR_BEG,  :tOP_ASGN, ">>" }
          when ss.skip(/\>\>/) then
            action { result :arg_state, :tRSHFT, ">>"   }
          when ss.skip(/\>/) then
            action { result :arg_state, :tGT, ">"       }
          end # group />/
        when ss.match?(/\`/) then
          case
          when expr_fname? && (ss.skip(/\`/)) then
            action { result(EXPR_END, :tBACK_REF2, "`") }
          when expr_dot? && (ss.skip(/\`/)) then
            action { result((cmd_state ? EXPR_CMDARG : EXPR_ARG), :tBACK_REF2, "`") }
          when ss.skip(/\`/) then
            action { string STR_XQUOTE, '`'; result(nil, :tXSTRING_BEG, "`") }
          end # group /\`/
        when text = ss.scan(/\?/) then
          process_questionmark text
        when ss.match?(/&/) then
          case
          when ss.skip(/\&\&\=/) then
            action { result(EXPR_BEG, :tOP_ASGN, "&&") }
          when ss.skip(/\&\&/) then
            action { result(EXPR_BEG, :tANDOP,   "&&") }
          when ss.skip(/\&\=/) then
            action { result(EXPR_BEG, :tOP_ASGN, "&" ) }
          when ss.skip(/\&\./) then
            action { result(EXPR_DOT, :tLONELY,  "&.") }
          when text = ss.scan(/\&/) then
            process_amper text
          end # group /&/
        when text = ss.scan(/\//) then
          process_slash text
        when ss.match?(/\^/) then
          case
          when ss.skip(/\^=/) then
            action { result(EXPR_BEG, :tOP_ASGN, "^") }
          when ss.skip(/\^/) then
            action { result(:arg_state, :tCARET, "^") }
          end # group /\^/
        when ss.skip(/\;/) then
          action { self.command_start = true; result(EXPR_BEG, :tSEMI, ";") }
        when ss.match?(/~/) then
          case
          when is_after_operator? && (ss.skip(/\~@/)) then
            action { result(:arg_state, :tTILDE, "~") }
          when ss.skip(/\~/) then
            action { result(:arg_state, :tTILDE, "~") }
          end # group /~/
        when ss.match?(/\\/) then
          case
          when ss.skip(/\\\r?\n/) then
            action { self.lineno += 1; self.space_seen = true; next }
          when ss.skip(/\\/) then
            action { rb_compile_error "bare backslash only allowed before newline" }
          end # group /\\/
        when text = ss.scan(/\%/) then
          process_percent text
        when ss.match?(/\$/) then
          case
          when text = ss.scan(/\$_\w+/) then
            process_gvar text
          when text = ss.scan(/\$_/) then
            process_gvar text
          when text = ss.scan(/\$[~*$?!@\/\\;,.=:<>\"]|\$-\w?/) then
            process_gvar text
          when in_fname? && (text = ss.scan(/\$([\&\`\'\+])/)) then
            process_gvar text
          when text = ss.scan(/\$([\&\`\'\+])/) then
            process_backref text
          when in_fname? && (text = ss.scan(/\$([1-9]\d*)/)) then
            process_gvar text
          when text = ss.scan(/\$([1-9]\d*)/) then
            process_nthref text
          when text = ss.scan(/\$0/) then
            process_gvar text
          when text = ss.scan(/\$[^[:ascii:]]+/) then
            process_gvar text
          when text = ss.scan(/\$\W|\$\z/) then
            process_gvar_oddity text
          when text = ss.scan(/\$\w+/) then
            process_gvar text
          end # group /\$/
        when text = ss.scan(/\_/) then
          process_underscore text
        when text = ss.scan(/#{IDENT}/o) then
          process_token text
        when ss.skip(/\004|\032|\000|\Z/) then
          action { [RubyLexer::EOF, RubyLexer::EOF] }
        when text = ss.scan(/./) then
          action { rb_compile_error "Invalid char #{text.inspect} in expression" }
        else
          text = ss.string[ss.pos .. -1]
          raise ScanError, "can not match (#{state.inspect}) at #{location}: '#{text}'"
        end
      else
        raise ScanError, "undefined state at #{location}: '#{state}'"
      end # token = case state

    next unless token # allow functions to trigger redo w/ nil
  end # while

  raise LexerError, "bad lexical result at #{location}: #{token.inspect}" unless
    token.nil? || (Array === token && token.size >= 2)

  # auto-switch state
  self.state = token.last if token && token.first == :state

  token
end

#not_end?Boolean

Returns:

  • (Boolean)


468
469
470
# File 'lib/ruby_lexer.rb', line 468

def not_end?
  not is_end?
end

#old_lineno=Object

:startdoc:



1437
1438
1439
# File 'lib/ruby_lexer.rb', line 1437

def lineno=(value)
  @lineno = value
end

#parse(str) ⇒ Object

Parse the given string.



82
83
84
85
86
87
# File 'lib/ruby_lexer.rex.rb', line 82

def parse str
  self.ss     = scanner_class.new str
  self.state  ||= nil

  do_parse
end

#parse_file(path) ⇒ Object

Read in and parse the file at path.



92
93
94
95
96
97
# File 'lib/ruby_lexer.rex.rb', line 92

def parse_file path
  self.filename = path
  open path do |f|
    parse f.read
  end
end

#parse_quoteObject

TODO: remove / rewrite



1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
# File 'lib/ruby_lexer.rb', line 1318

def parse_quote # TODO: remove / rewrite
  beg, nnd, short_hand, c = nil, nil, false, nil

  if scan(/[a-z0-9]{1,2}/i) then # Long-hand (e.g. %Q{}).
    rb_compile_error "unknown type of %string" if ss.matched_size == 2
    c, beg, short_hand = matched, ss.getch, false
  else                               # Short-hand (e.g. %{, %., %!, etc)
    c, beg, short_hand = 'Q', ss.getch, true
  end

  if end_of_stream? or c == RubyLexer::EOF or beg == RubyLexer::EOF then
    rb_compile_error "unterminated quoted string meets end of file"
  end

  # Figure nnd-char.  "\0" is special to indicate beg=nnd and that no nesting?
  nnd = { "(" => ")", "[" => "]", "{" => "}", "<" => ">" }[beg]
  nnd, beg = beg, "\0" if nnd.nil?

  token_type, text = nil, "%#{c}#{beg}"
  token_type, string_type = case c
                            when 'Q' then
                              ch = short_hand ? nnd : c + beg
                              text = "%#{ch}"
                              [:tSTRING_BEG,   STR_DQUOTE]
                            when 'q' then
                              [:tSTRING_BEG,   STR_SQUOTE]
                            when 'W' then
                              eat_whitespace
                              [:tWORDS_BEG,    STR_DQUOTE | STR_FUNC_QWORDS]
                            when 'w' then
                              eat_whitespace
                              [:tQWORDS_BEG,   STR_SQUOTE | STR_FUNC_QWORDS]
                            when 'x' then
                              [:tXSTRING_BEG,  STR_XQUOTE]
                            when 'r' then
                              [:tREGEXP_BEG,   STR_REGEXP]
                            when 's' then
                              self.lex_state = EXPR_FNAME
                              [:tSYMBEG,       STR_SSYM]
                            when 'I' then
                              eat_whitespace
                              [:tSYMBOLS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
                            when 'i' then
                              eat_whitespace
                              [:tQSYMBOLS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
                            end

  rb_compile_error "Bad %string type. Expected [QqWwIixrs], found '#{c}'." if
    token_type.nil?

  raise "huh" unless string_type

  string string_type, nnd, beg

  return token_type, text
end

#parse_string(quote) ⇒ Object

TODO: rewrite / remove



1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
# File 'lib/ruby_lexer.rb', line 1375

def parse_string quote # TODO: rewrite / remove
  _, string_type, term, open = quote

  space = false # FIX: remove these
  func = string_type
  paren = open
  term_re = @@regexp_cache[term]

  qwords = (func & STR_FUNC_QWORDS) != 0
  regexp = (func & STR_FUNC_REGEXP) != 0
  expand = (func & STR_FUNC_EXPAND) != 0

  unless func then # nil'ed from qwords below. *sigh*
    return :tSTRING_END, nil
  end

  space = true if qwords and eat_whitespace

  if self.string_nest == 0 && scan(/#{term_re}/) then
    if qwords then
      quote[1] = nil
      return :tSPACE, nil
    elsif regexp then
      return :tREGEXP_END, self.regx_options
    else
      return :tSTRING_END, term
    end
  end

  return :tSPACE, nil if space

  self.string_buffer = []

  if expand
    case
    when scan(/#(?=\$(-.|[a-zA-Z_0-9~\*\$\?!@\/\\;,\.=:<>\"\&\`\'+]))/) then
      # TODO: !ISASCII
      # ?! see parser_peek_variable_name
      return :tSTRING_DVAR, nil
    when scan(/#(?=\@\@?[a-zA-Z_])/) then
      # TODO: !ISASCII
      return :tSTRING_DVAR, nil
    when scan(/#[{]/) then
      self.command_start = true
      return :tSTRING_DBEG, nil
    when scan(/#/) then
      string_buffer << '#'
    end
  end

  if tokadd_string(func, term, paren) == RubyLexer::EOF then
    rb_compile_error "unterminated string meets end of file"
  end

  return :tSTRING_CONTENT, string_buffer.join
end

#possibly_escape_string(text, check) ⇒ Object



841
842
843
844
845
846
847
848
849
# File 'lib/ruby_lexer.rb', line 841

def possibly_escape_string text, check
  content = match[1]

  if text =~ check then
    content.gsub(ESC) { unescape $1 }
  else
    content.gsub(/\\\\/, "\\").gsub(/\\'/, "'")
  end
end

#process_amper(text) ⇒ Object



472
473
474
475
476
477
478
479
480
481
482
483
# File 'lib/ruby_lexer.rb', line 472

def process_amper text
  token = if is_arg? && space_seen && !check(/\s/) then
             warning("`&' interpreted as argument prefix")
             :tAMPER
           elsif lex_state =~ EXPR_BEG|EXPR_MID then
             :tAMPER
           else
             :tAMPER2
           end

  return result(:arg_state, token, "&")
end

#process_backref(text) ⇒ Object



485
486
487
488
489
# File 'lib/ruby_lexer.rb', line 485

def process_backref text
  token = ss[1].to_sym
  # TODO: can't do lineno hack w/ symbol
  result EXPR_END, :tBACK_REF, token
end

#process_begin(text) ⇒ Object



491
492
493
494
495
496
497
498
499
500
501
502
503
# File 'lib/ruby_lexer.rb', line 491

def process_begin text
  @comments << matched

  unless scan(/.*?\n=end( |\t|\f)*[^\n]*(\n|\z)/m) then
    @comments.clear
    rb_compile_error("embedded document meets end of file")
  end

  @comments << matched
  self.lineno += matched.count("\n")

  nil # TODO
end

#process_brace_close(text) ⇒ Object



505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
# File 'lib/ruby_lexer.rb', line 505

def process_brace_close text
  # matching compare/parse23.y:8561
  cond.lexpop
  cmdarg.lexpop

  case matched
  when "}" then
    self.brace_nest -= 1
    self.lex_state   = EXPR_ENDARG # TODO: EXPR_END ? Look at 2.6

    return :tSTRING_DEND, matched if brace_nest < 0
    return :tRCURLY, matched
  when "]" then
    self.paren_nest -= 1
    self.lex_state   = EXPR_ENDARG
    return :tRBRACK, matched
  when ")" then
    self.paren_nest -= 1
    self.lex_state   = EXPR_ENDFN
    return :tRPAREN, matched
  else
    raise "Unknown bracing: #{matched.inspect}"
  end
end

#process_brace_open(text) ⇒ Object



554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
# File 'lib/ruby_lexer.rb', line 554

def process_brace_open text
  # matching compare/parse23.y:8694
  self.brace_nest += 1

  if lambda_beginning? then
    self.lpar_beg = nil
    self.paren_nest -= 1 # close arg list when lambda opens body

    return expr_result(:tLAMBEG, "{")
  end

  token = case
          when lex_state =~ EXPR_LABELED then
            :tLBRACE     # hash
          when lex_state =~ EXPR_ARG_ANY|EXPR_END|EXPR_ENDFN then
            :tLCURLY     # block (primary) '{' in parse.y
          when lex_state =~ EXPR_ENDARG then
            :tLBRACE_ARG # block (expr)
          else
            :tLBRACE     # hash
          end

  state = token == :tLBRACE_ARG ? EXPR_BEG : EXPR_PAR
  self.command_start = true if token != :tLBRACE

  cond.push false
  cmdarg.push false
  result state, token, text
end

#process_colon1(text) ⇒ Object



530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
# File 'lib/ruby_lexer.rb', line 530

def process_colon1 text
  # ?: / then / when
  if is_end? || check(/\s/) then
    return result EXPR_BEG, :tCOLON, text
  end

  case
  when scan(/\'/) then
    string STR_SSYM
  when scan(/\"/) then
    string STR_DSYM
  end

  result EXPR_FNAME, :tSYMBEG, text
end

#process_colon2(text) ⇒ Object



546
547
548
549
550
551
552
# File 'lib/ruby_lexer.rb', line 546

def process_colon2 text
  if is_beg? || lex_state =~ EXPR_CLASS || is_space_arg? then
    result EXPR_BEG, :tCOLON3, text
  else
    result EXPR_DOT, :tCOLON2, text
  end
end

#process_float(text) ⇒ Object



584
585
586
587
588
589
590
591
592
593
594
595
596
597
# File 'lib/ruby_lexer.rb', line 584

def process_float text
  rb_compile_error "Invalid numeric format" if text =~ /__/

  case
  when text.end_with?('ri')
    return result EXPR_NUM, :tIMAGINARY, Complex(0, Rational(text.chop.chop))
  when text.end_with?('i')
    return result EXPR_NUM, :tIMAGINARY, Complex(0, text.chop.to_f)
  when text.end_with?('r')
    return result EXPR_NUM, :tRATIONAL,  Rational(text.chop)
  else
    return result EXPR_NUM, :tFLOAT, text.to_f
  end
end

#process_gvar(text) ⇒ Object



599
600
601
602
# File 'lib/ruby_lexer.rb', line 599

def process_gvar text
  text.lineno = self.lineno
  result EXPR_END, :tGVAR, text
end

#process_gvar_oddity(text) ⇒ Object



604
605
606
607
# File 'lib/ruby_lexer.rb', line 604

def process_gvar_oddity text
  return result EXPR_END, "$", "$" if text == "$" # TODO: wtf is this?
  rb_compile_error "#{text.inspect} is not allowed as a global variable name"
end

#process_ivar(text) ⇒ Object



609
610
611
612
613
# File 'lib/ruby_lexer.rb', line 609

def process_ivar text
  tok_id = text =~ /^@@/ ? :tCVAR : :tIVAR
  text.lineno = self.lineno
  result EXPR_END, tok_id, text
end

#process_label(text) ⇒ Object



874
875
876
877
878
# File 'lib/ruby_lexer.rb', line 874

def process_label text
  symbol = possibly_escape_string text, /^"/

  result EXPR_LAB, :tLABEL, [symbol, self.lineno]
end

#process_label_or_string(text) ⇒ Object



862
863
864
865
866
867
868
869
870
871
872
# File 'lib/ruby_lexer.rb', line 862

def process_label_or_string text
  if @was_label && text =~ /:\Z/ then
    @was_label = nil
    return process_label text
  elsif text =~ /:\Z/ then
    ss.pos -= 1 # put back ":"
    text = text[0..-2]
  end

  result EXPR_END, :tSTRING, text[1..-2].gsub(/\\\\/, "\\").gsub(/\\'/, "'")
end

#process_lchevron(text) ⇒ Object



615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
# File 'lib/ruby_lexer.rb', line 615

def process_lchevron text
  if (lex_state !~ EXPR_DOT|EXPR_CLASS &&
      !is_end? &&
      (!is_arg? || lex_state =~ EXPR_LABELED || space_seen)) then
    tok = self.heredoc_identifier
    return tok if tok
  end

  if is_after_operator? then
    self.lex_state = EXPR_ARG
  else
    self.command_start = true if lex_state =~ EXPR_CLASS
    self.lex_state = EXPR_BEG
  end

  return result(lex_state, :tLSHFT, "\<\<")
end

#process_newline_or_comment(text) ⇒ Object



633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
# File 'lib/ruby_lexer.rb', line 633

def process_newline_or_comment text
  c = matched
  hit = false

  if c == '#' then
    ss.pos -= 1

    # TODO: handle magic comments
    while scan(/\s*\#.*(\n+|\z)/) do
      hit = true
      self.lineno += matched.lines.to_a.size
      @comments << matched.gsub(/^ +#/, '#').gsub(/^ +$/, '')
    end

    return nil if end_of_stream?
  end

  self.lineno += 1 unless hit

  # Replace a string of newlines with a single one
  self.lineno += matched.lines.to_a.size if scan(/\n+/)

  c = (lex_state =~ EXPR_BEG|EXPR_CLASS|EXPR_FNAME|EXPR_DOT &&
       lex_state !~ EXPR_LABELED)
  # TODO: figure out what token_seen is for
  if c || self.lex_state == EXPR_LAB then # yes, == EXPR_LAB
    # ignore if !fallthrough?
    if !c && parser.in_kwarg then
      # normal newline
      self.command_start = true
      return result EXPR_BEG, :tNL, nil
    else
      return # skip
    end
  end

  if scan(/([\ \t\r\f\v]*)(\.|&)/) then
    self.space_seen = true unless ss[1].empty?

    ss.pos -= 1
    return unless check(/\.\./)
  end

  self.command_start = true

  return result(EXPR_BEG, :tNL, nil)
end

#process_nthref(text) ⇒ Object



681
682
683
684
# File 'lib/ruby_lexer.rb', line 681

def process_nthref text
  # TODO: can't do lineno hack w/ number
  result EXPR_END, :tNTH_REF, ss[1].to_i
end

#process_paren(text) ⇒ Object



686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
# File 'lib/ruby_lexer.rb', line 686

def process_paren text
  token = if is_beg? then
            :tLPAREN
          elsif !space_seen then
            # foo( ... ) => method call, no ambiguity
            :tLPAREN2
          elsif is_space_arg? then
            :tLPAREN_ARG
          elsif lex_state =~ EXPR_ENDFN && !lambda_beginning? then
            # TODO:
            # warn("parentheses after method name is interpreted as " \
            #      "an argument list, not a decomposed argument")
            :tLPAREN2
          else
            :tLPAREN2 # plain '(' in parse.y
          end

  self.paren_nest += 1

  cond.push false
  cmdarg.push false
  result EXPR_PAR, token, text
end

#process_percent(text) ⇒ Object



710
711
712
713
714
715
716
717
718
# File 'lib/ruby_lexer.rb', line 710

def process_percent text
  return parse_quote if is_beg?

  return result EXPR_BEG, :tOP_ASGN, "%" if scan(/\=/)

  return parse_quote if is_space_arg?(check(/\s/)) || (lex_state =~ EXPR_FITEM && check(/s/))

  return result :arg_state, :tPERCENT, "%"
end

#process_plus_minus(text) ⇒ Object



720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
# File 'lib/ruby_lexer.rb', line 720

def process_plus_minus text
  sign = matched
  utype, type = if sign == "+" then
                  [:tUPLUS, :tPLUS]
                else
                  [:tUMINUS, :tMINUS]
                end

  if is_after_operator? then
    if scan(/@/) then
      return result(EXPR_ARG, utype, "#{sign}@")
    else
      return result(EXPR_ARG, type, sign)
    end
  end

  return result(EXPR_BEG, :tOP_ASGN, sign) if scan(/\=/)

  if (is_beg? || (is_arg? && space_seen && !check(/\s/))) then
    arg_ambiguous if is_arg?

    if check(/\d/) then
      return nil if utype == :tUPLUS
      return result EXPR_BEG, :tUMINUS_NUM, sign
    end

    return result EXPR_BEG, utype, sign
  end

  result EXPR_BEG, type, sign
end

#process_questionmark(text) ⇒ Object



752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
# File 'lib/ruby_lexer.rb', line 752

def process_questionmark text
  if is_end? then
    return result EXPR_BEG, :tEH, "?"
  end

  if end_of_stream? then
    rb_compile_error "incomplete character syntax: parsed #{text.inspect}"
  end

  if check(/\s|\v/) then
    unless is_arg? then
      c2 = { " " => 's',
            "\n" => 'n',
            "\t" => 't',
            "\v" => 'v',
            "\r" => 'r',
            "\f" => 'f' }[matched]

      if c2 then
        warning("invalid character syntax; use ?\\" + c2)
      end
    end

    # ternary
    return result EXPR_BEG, :tEH, "?"
  elsif check(/\w(?=\w)/) then # ternary, also
    return result EXPR_BEG, :tEH, "?"
  end

  c = if scan(/\\/) then
        self.read_escape
      else
        ss.getch
      end

  result EXPR_END, :tSTRING, c
end

#process_slash(text) ⇒ Object



790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
# File 'lib/ruby_lexer.rb', line 790

def process_slash text
  if is_beg? then
    string STR_REGEXP

    return result(nil, :tREGEXP_BEG, "/")
  end

  if scan(/\=/) then
    return result(EXPR_BEG, :tOP_ASGN, "/")
  end

  if is_arg? && space_seen then
    unless scan(/\s/) then
      arg_ambiguous
      string STR_REGEXP, "/"
      return result(nil, :tREGEXP_BEG, "/")
    end
  end

  return result(:arg_state, :tDIVIDE, "/")
end

#process_square_bracket(text) ⇒ Object



812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
# File 'lib/ruby_lexer.rb', line 812

def process_square_bracket text
  self.paren_nest += 1

  token = nil

  if is_after_operator? then
    case
    when scan(/\]\=/) then
      self.paren_nest -= 1 # HACK? I dunno, or bug in MRI
      return result EXPR_ARG, :tASET, "[]="
    when scan(/\]/) then
      self.paren_nest -= 1 # HACK? I dunno, or bug in MRI
      return result EXPR_ARG, :tAREF, "[]"
    else
      rb_compile_error "unexpected '['"
    end
  elsif is_beg? then
    token = :tLBRACK
  elsif is_arg? && (space_seen || lex_state =~ EXPR_LABELED) then
    token = :tLBRACK
  else
    token = :tLBRACK2
  end

  cond.push false
  cmdarg.push false
  result EXPR_PAR, token, text
end

#process_stringObject

TODO: rewrite / remove



1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
# File 'lib/ruby_lexer.rb', line 1290

def process_string # TODO: rewrite / remove
  # matches top of parser_yylex in compare/parse23.y:8113
  token = if lex_strterm[0] == :heredoc then
            self.heredoc lex_strterm
          else
            self.parse_string lex_strterm
          end

  token_type, c = token

  # matches parser_string_term
  if ruby22plus? && token_type == :tSTRING_END && ["'", '"'].include?(c) then
    if ((lex_state =~ EXPR_BEG|EXPR_ENDFN &&
         !cond.is_in_state) || is_arg?) &&
        is_label_suffix? then
      scan(/:/)
      token_type = token[0] = :tLABEL_END
    end
  end

  if [:tSTRING_END, :tREGEXP_END, :tLABEL_END].include? token_type then
    self.lex_strterm = nil
    self.lex_state   = (token_type == :tLABEL_END) ? EXPR_PAR : EXPR_END
  end

  return token
end

#process_symbol(text) ⇒ Object



851
852
853
854
855
# File 'lib/ruby_lexer.rb', line 851

def process_symbol text
  symbol = possibly_escape_string text, /^:"/

  result EXPR_END, :tSYMBOL, symbol
end

#process_token(text) ⇒ Object



880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
# File 'lib/ruby_lexer.rb', line 880

def process_token text
  # matching: parse_ident in compare/parse23.y:7989
  # TODO: make this always return [token, lineno]
  # FIX: remove: self.last_state = lex_state

  token = self.token = text
  token << matched if scan(/[\!\?](?!=)/)

  tok_id =
    case
    when token =~ /[!?]$/ then
      :tFID
    when lex_state =~ EXPR_FNAME && scan(/=(?:(?![~>=])|(?==>))/) then
      # ident=, not =~ => == or followed by =>
      # TODO test lexing of a=>b vs a==>b
      token << matched
      :tIDENTIFIER
    when token =~ /^[A-Z]/ then
      :tCONSTANT
    else
      :tIDENTIFIER
    end

  if is_label_possible? and is_label_suffix? then
    scan(/:/)
    return result EXPR_LAB, :tLABEL, [token, self.lineno]
  end

  # TODO: mb == ENC_CODERANGE_7BIT && lex_state !~ EXPR_DOT
  if lex_state !~ EXPR_DOT then
    # See if it is a reserved word.
    keyword = RubyParserStuff::Keyword.keyword token

    return process_token_keyword keyword if keyword
  end

  # matching: compare/parse23.y:8079
  state = if is_beg? or is_arg? or lex_state =~ EXPR_DOT then
            cmd_state ? EXPR_CMDARG : EXPR_ARG
          elsif lex_state =~ EXPR_FNAME then
            EXPR_ENDFN
          else
            EXPR_END
          end

  if last_state !~ EXPR_DOT|EXPR_FNAME and
      (tok_id == :tIDENTIFIER) and # not EXPR_FNAME, not attrasgn
      lvar_defined?(token) then
    state = EXPR_END|EXPR_LABEL
  end

  token.lineno = self.lineno # yes, on a string. I know... I know...

  return result(state, tok_id, token)
end

#process_token_keyword(keyword) ⇒ Object



936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
# File 'lib/ruby_lexer.rb', line 936

def process_token_keyword keyword
  # matching MIDDLE of parse_ident in compare/parse23.y:8046
  state = lex_state
  self.lex_state = keyword.state

  value = [token, self.lineno]

  return result(lex_state, keyword.id0, value) if state =~ EXPR_FNAME

  self.command_start = true if lex_state =~ EXPR_BEG

  case
  when keyword.id0 == :kDO then
    case
    when lambda_beginning? then
      self.lpar_beg = nil # lambda_beginning? == FALSE in the body of "-> do ... end"
      self.paren_nest -= 1
      result lex_state, :kDO_LAMBDA, value
    when cond.is_in_state then
      result lex_state, :kDO_COND, value
    when cmdarg.is_in_state && state != EXPR_CMDARG then
      result lex_state, :kDO_BLOCK, value
    when state =~ EXPR_BEG|EXPR_ENDARG then
      result lex_state, :kDO_BLOCK, value
    else
      result lex_state, :kDO, value
    end
  when state =~ EXPR_PAD then
    result lex_state, keyword.id0, value
  when keyword.id0 != keyword.id1 then
    result EXPR_PAR, keyword.id1, value
  else
    result lex_state, keyword.id1, value
  end
end

#process_underscore(text) ⇒ Object



972
973
974
975
976
977
978
979
980
# File 'lib/ruby_lexer.rb', line 972

def process_underscore text
  ss.unscan # put back "_"

  if beginning_of_line? && scan(/\__END__(\r?\n|\Z)/) then
    return [RubyLexer::EOF, RubyLexer::EOF]
  elsif scan(/\_\w*/) then
    return process_token matched
  end
end

#rb_compile_error(msg) ⇒ Object



982
983
984
985
# File 'lib/ruby_lexer.rb', line 982

def rb_compile_error msg
  msg += ". near line #{self.lineno}: #{ss.rest[/^.*/].inspect}"
  raise RubyParser::SyntaxError, msg
end

#read_escapeObject

TODO: remove / rewrite



987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
# File 'lib/ruby_lexer.rb', line 987

def read_escape # TODO: remove / rewrite
  case
  when scan(/\\/) then                  # Backslash
    '\\'
  when scan(/n/) then                   # newline
    self.extra_lineno -= 1
    "\n"
  when scan(/t/) then                   # horizontal tab
    "\t"
  when scan(/r/) then                   # carriage-return
    "\r"
  when scan(/f/) then                   # form-feed
    "\f"
  when scan(/v/) then                   # vertical tab
    "\13"
  when scan(/a/) then                   # alarm(bell)
    "\007"
  when scan(/e/) then                   # escape
    "\033"
  when scan(/b/) then                   # backspace
    "\010"
  when scan(/s/) then                   # space
    " "
  when scan(/[0-7]{1,3}/) then          # octal constant
    (matched.to_i(8) & 0xFF).chr.force_encoding Encoding::UTF_8
  when scan(/x([0-9a-fA-F]{1,2})/) then # hex constant
    # TODO: force encode everything to UTF-8?
    ss[1].to_i(16).chr.force_encoding Encoding::UTF_8
  when check(/M-\\[\\MCc]/) then
    scan(/M-\\/) # eat it
    c = self.read_escape
    c[0] = (c[0].ord | 0x80).chr
    c
  when scan(/M-(.)/) then
    c = ss[1]
    c[0] = (c[0].ord | 0x80).chr
    c
  when check(/(C-|c)\\[\\MCc]/) then
    scan(/(C-|c)\\/) # eat it
    c = self.read_escape
    c[0] = (c[0].ord & 0x9f).chr
    c
  when scan(/C-\?|c\?/) then
    127.chr
  when scan(/(C-|c)(.)/) then
    c = ss[2]
    c[0] = (c[0].ord & 0x9f).chr
    c
  when scan(/^[89]/i) then # bad octal or hex... MRI ignores them :(
    matched
  when scan(/u([0-9a-fA-F]{4}|\{[0-9a-fA-F]{2,6}\})/) then
    [ss[1].delete("{}").to_i(16)].pack("U")
  when scan(/u([0-9a-fA-F]{1,3})/) then
    rb_compile_error "Invalid escape character syntax"
  when scan(/[McCx0-9]/) || end_of_stream? then
    rb_compile_error("Invalid escape character syntax")
  else
    ss.getch
  end.dup
end

#regx_optionsObject

TODO: rewrite / remove



1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
# File 'lib/ruby_lexer.rb', line 1048

def regx_options # TODO: rewrite / remove
  good, bad = [], []

  if scan(/[a-z]+/) then
    good, bad = matched.split(//).partition { |s| s =~ /^[ixmonesu]$/ }
  end

  unless bad.empty? then
    rb_compile_error("unknown regexp option%s - %s" %
                     [(bad.size > 1 ? "s" : ""), bad.join.inspect])
  end

  return good.join
end

#resetObject



1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
# File 'lib/ruby_lexer.rb', line 1063

def reset
  self.brace_nest    = 0
  self.command_start = true
  self.comments      = []
  self.lex_state     = EXPR_NONE
  self.lex_strterm   = nil
  self.lineno        = 1
  self.lpar_beg      = nil
  self.paren_nest    = 0
  self.space_seen    = false
  self.string_nest   = 0
  self.token         = nil
  self.extra_lineno  = 0

  self.cond.reset
  self.cmdarg.reset
end

#result(new_state, token, text) ⇒ Object

:nodoc:



1081
1082
1083
1084
1085
# File 'lib/ruby_lexer.rb', line 1081

def result new_state, token, text # :nodoc:
  new_state = self.arg_state if new_state == :arg_state
  self.lex_state = new_state if new_state
  [token, text]
end

#ruby22_label?Boolean

Returns:

  • (Boolean)


444
445
446
# File 'lib/ruby_lexer.rb', line 444

def ruby22_label?
  ruby22plus? and is_label_possible?
end

#ruby22plus?Boolean

Returns:

  • (Boolean)


1282
1283
1284
# File 'lib/ruby_lexer.rb', line 1282

def ruby22plus?
  parser.class.version >= 22
end

#ruby23plus?Boolean

Returns:

  • (Boolean)


1286
1287
1288
# File 'lib/ruby_lexer.rb', line 1286

def ruby23plus?
  parser.class.version >= 23
end

#scan(re) ⇒ Object



1087
1088
1089
# File 'lib/ruby_lexer.rb', line 1087

def scan re
  ss.scan re
end

#scanner_classObject

The current scanner class. Must be overridden in subclasses.



1106
1107
1108
# File 'lib/ruby_lexer.rb', line 1106

def scanner_class # TODO: design this out of oedipus_lex. or something.
  RPStringScanner
end

#space_vs_beginning(space_type, beg_type, fallback) ⇒ Object



1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
# File 'lib/ruby_lexer.rb', line 1110

def space_vs_beginning space_type, beg_type, fallback
  if is_space_arg? check(/./m) then
    warning "`**' interpreted as argument prefix"
    space_type
  elsif is_beg? then
    beg_type
  else
    # TODO: warn_balanced("**", "argument prefix");
    fallback
  end
end

#string(type, beg = matched, nnd = "\0") ⇒ Object



1122
1123
1124
# File 'lib/ruby_lexer.rb', line 1122

def string type, beg = matched, nnd = "\0"
  self.lex_strterm = [:strterm, type, beg, nnd]
end

#tokadd_escape(term) ⇒ Object

TODO: consider def src= src

raise "bad src: #{src.inspect}" unless String === src
@src = RPStringScanner.new(src)

end



1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
# File 'lib/ruby_lexer.rb', line 1132

def tokadd_escape term # TODO: rewrite / remove
  case
  when scan(/\\\n/) then
    # just ignore
  when scan(/\\([0-7]{1,3}|x[0-9a-fA-F]{1,2})/) then
    self.string_buffer << matched
  when scan(/\\([MC]-|c)(?=\\)/) then
    self.string_buffer << matched
    self.tokadd_escape term
  when scan(/\\([MC]-|c)(.)/) then
    self.string_buffer << matched
  when scan(/\\[McCx]/) then
    rb_compile_error "Invalid escape character syntax"
  when scan(/\\(.)/m) then
    chr = ss[1]
    prev = self.string_buffer.last
    if term == chr && prev && prev.end_with?("(?") then
      self.string_buffer << chr
    elsif term == chr || chr.ascii_only? then
      self.string_buffer << matched # dunno why we keep them for ascii
    else
      self.string_buffer << chr # HACK? this is such a rat's nest
    end
  else
    rb_compile_error "Invalid escape character syntax"
  end
end

#tokadd_string(func, term, paren) ⇒ Object

TODO: rewrite / remove



1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
# File 'lib/ruby_lexer.rb', line 1160

def tokadd_string(func, term, paren) # TODO: rewrite / remove
  qwords = (func & STR_FUNC_QWORDS) != 0
  escape = (func & STR_FUNC_ESCAPE) != 0
  expand = (func & STR_FUNC_EXPAND) != 0
  regexp = (func & STR_FUNC_REGEXP) != 0
  symbol = (func & STR_FUNC_SYMBOL) != 0

  paren_re = @@regexp_cache[paren]
  term_re  = @@regexp_cache[term]

  until end_of_stream? do
    c = nil
    handled = true

    case
    when paren_re && scan(paren_re) then
      self.string_nest += 1
    when scan(term_re) then
      if self.string_nest == 0 then
        ss.pos -= 1
        break
      else
        self.string_nest -= 1
      end
    when expand && scan(/#(?=[\$\@\{])/) then # TODO: this seems wrong
      ss.pos -= 1
      break
    when qwords && scan(/\s/) then
      ss.pos -= 1
      break
    when expand && scan(/#(?!\n)/) then
      # do nothing
    when check(/\\/) then
      case
      when qwords && scan(/\\\n/) then
        string_buffer << "\n"
        next
      when qwords && scan(/\\\s/) then
        c = ' '
      when expand && scan(/\\\n/) then
        next
      when regexp && check(/\\/) then
        self.tokadd_escape term
        next
      when expand && scan(/\\/) then
        c = self.read_escape
      when scan(/\\\n/) then
        # do nothing
      when scan(/\\\\/) then
        string_buffer << '\\' if escape
        c = '\\'
      when scan(/\\/) then
        unless scan(term_re) || paren.nil? || scan(paren_re) then
          string_buffer << "\\"
        end
      else
        handled = false
      end # inner /\\/ case
    else
      handled = false
    end # top case

    unless handled then
      t = Regexp.escape term
      x = Regexp.escape(paren) if paren && paren != "\000"
      re = if qwords then
             /[^#{t}#{x}\#\0\\\s]+|./ # |. to pick up whatever
           else
             /[^#{t}#{x}\#\0\\]+|./
           end

      scan re
      c = matched

      rb_compile_error "symbol cannot contain '\\0'" if symbol && c =~ /\0/
    end # unless handled

    c ||= matched
    string_buffer << c
  end # until

  c ||= matched
  c = RubyLexer::EOF if end_of_stream?

  return c
end

#unescape(s) ⇒ Object



1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
# File 'lib/ruby_lexer.rb', line 1247

def unescape s
  r = ESCAPES[s]

  self.extra_lineno += 1 if s == "\n"     # eg backslash newline strings
  self.extra_lineno -= 1 if r && s == "n" # literal \n, not newline

  return r if r

  x = case s
      when /^[0-7]{1,3}/ then
        ($&.to_i(8) & 0xFF).chr
      when /^x([0-9a-fA-F]{1,2})/ then
        $1.to_i(16).chr
      when /^M-(.)/ then
        ($1[0].ord | 0x80).chr
      when /^(C-|c)(.)/ then
        ($2[0].ord & 0x9f).chr
      when /^[89a-f]/i then # bad octal or hex... ignore? that's what MRI does :(
        s
      when /^[McCx0-9]/ then
        rb_compile_error("Invalid escape character syntax")
      when /u([0-9a-fA-F]{4}|\{[0-9a-fA-F]{2,6}\})/ then
        [$1.delete("{}").to_i(16)].pack("U")
      when /u([0-9a-fA-F]{1,3})/ then
        rb_compile_error("Invalid escape character syntax")
      else
        s
      end
  x
end

#warning(s) ⇒ Object



1278
1279
1280
# File 'lib/ruby_lexer.rb', line 1278

def warning s
  # do nothing for now
end

#was_label?Boolean

Returns:

  • (Boolean)


857
858
859
860
# File 'lib/ruby_lexer.rb', line 857

def was_label?
  @was_label = ruby22_label?
  true
end