Class: RubyLexer

Inherits:
Object
  • Object
show all
Includes:
SSStackish, SSWrapper, State::Values
Defined in:
lib/ruby_lexer.rex.rb,
lib/ruby_lexer.rb,
lib/ruby_lexer.rb,
lib/ruby_lexer.rb,
lib/ruby_lexer.rb,
lib/ruby_lexer.rb,
lib/ruby_lexer_strings.rb

Overview

The generated lexer RubyLexer

Defined Under Namespace

Modules: SSStackish, SSWrapper Classes: LexerError, ScanError, State

Constant Summary collapse

EOF =

:stopdoc:

:eof_haha!
ESCAPES =
{
  "a"    => "\007",
  "b"    => "\010",
  "e"    => "\033",
  "f"    => "\f",
  "n"    => "\n",
  "r"    => "\r",
  "s"    => " ",
  "t"    => "\t",
  "v"    => "\13",
  "\\"   => '\\',
  "\n"   => "",
  "C-\?" => 127.chr,
  "c\?"  => 127.chr,
}
HAS_ENC =
"".respond_to? :encoding
BTOKENS =
{
  ".."  => :tBDOT2,
  "..." => :tBDOT3,
}
TOKENS =
{
  "!"   => :tBANG,
  "!="  => :tNEQ,
  "!@"  => :tBANG,
  "!~"  => :tNMATCH,
  ","   => :tCOMMA,
  ".."  => :tDOT2,
  "..." => :tDOT3,
  "="   => :tEQL,
  "=="  => :tEQ,
  "===" => :tEQQ,
  "=>"  => :tASSOC,
  "=~"  => :tMATCH,
  "->"  => :tLAMBDA,
}
PERCENT_END =
{
  "(" => ")",
  "[" => "]",
  "{" => "}",
  "<" => ">",
}
SIMPLE_RE_META =
/[\$\*\+\.\?\^\|\)\]\}\>]/
IDENT_CHAR =

:stopdoc:

/[a-zA-Z0-9_[:^ascii:]]/
ESC =
/\\((?>[0-7]{1,3}|x\h{1,2}|M-[^\\]|(C-|c)[^\\]|u\h{1,4}|u\{\h+(?:\s+\h+)*\}|[^0-7xMCc]))/
SIMPLE_STRING =
/((#{ESC}|\#(#{ESC}|[^\{\#\@\$\"\\])|[^\"\\\#])*)/o
SSTRING =
/((\\.|[^\'])*)/
INT_DEC =
/[+]?(?:(?:[1-9][\d_]*|0)(?!\.\d)(ri|r|i)?\b|0d[0-9_]+)(ri|r|i)?/i
INT_HEX =
/[+]?0x[a-f0-9_]+(ri|r|i)?/i
INT_BIN =
/[+]?0b[01_]+(ri|r|i)?/i
INT_OCT =
/[+]?0o?[0-7_]+(ri|r|i)?|0o(ri|r|i)?/i
FLOAT =
/[+]?\d[\d_]*\.[\d_]+(e[+-]?[\d_]+)?(?:(ri|r|i)\b)?|[+]?[\d_]+e[+-]?[\d_]+(?:(ri|r|i)\b)?/i
INT_DEC2 =
/[+]?\d[0-9_]*(?![e])((ri|r|i)\b)?/i
NUM_BAD =
/[+]?0[xbd]\b/i
INT_OCT_BAD =
/[+]?0o?[0-7_]*[89]/i
FLOAT_BAD =
/[+]?\d[\d_]*_(e|\.)/i
@@regexp_cache =
Hash.new { |h, k| h[k] = Regexp.new(Regexp.escape(k)) }

Constants included from State::Values

State::Values::EXPR_ARG, State::Values::EXPR_ARG_ANY, State::Values::EXPR_BEG, State::Values::EXPR_BEG_ANY, State::Values::EXPR_CLASS, State::Values::EXPR_CMDARG, State::Values::EXPR_DOT, State::Values::EXPR_END, State::Values::EXPR_ENDARG, State::Values::EXPR_ENDFN, State::Values::EXPR_END_ANY, State::Values::EXPR_FITEM, State::Values::EXPR_FNAME, State::Values::EXPR_LAB, State::Values::EXPR_LABEL, State::Values::EXPR_LABELED, State::Values::EXPR_LIT, State::Values::EXPR_MID, State::Values::EXPR_NONE, State::Values::EXPR_NUM, State::Values::EXPR_PAD, State::Values::EXPR_PAR, State::Values::STR_DQUOTE, State::Values::STR_DSYM, State::Values::STR_DWORD, State::Values::STR_FUNC_BORING, State::Values::STR_FUNC_DEDENT, State::Values::STR_FUNC_ESCAPE, State::Values::STR_FUNC_EXPAND, State::Values::STR_FUNC_INDENT, State::Values::STR_FUNC_LABEL, State::Values::STR_FUNC_LIST, State::Values::STR_FUNC_QWORDS, State::Values::STR_FUNC_REGEXP, State::Values::STR_FUNC_SYMBOL, State::Values::STR_FUNC_TERM, State::Values::STR_LABEL, State::Values::STR_REGEXP, State::Values::STR_SQUOTE, State::Values::STR_SSYM, State::Values::STR_SWORD, State::Values::STR_XQUOTE

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from SSWrapper

#beginning_of_line?, #check, #end_of_stream?, #getch, #in_heredoc?, #matched, #maybe_pop_stack, #pos, #pos=, #rest, #scan, #ss_string, #ss_string=, #string=, #unscan

Methods included from SSStackish

#lineno_pop, #lineno_push, #ss_pop, #ss_push

Constructor Details

#initialize(_ = nil) ⇒ RubyLexer

Returns a new instance of RubyLexer.



119
120
121
122
123
124
125
126
127
128
# File 'lib/ruby_lexer.rb', line 119

def initialize _ = nil
  @lex_state = nil # remove one warning under $DEBUG
  @lex_state = EXPR_NONE

  self.cond   = RubyParserStuff::StackState.new(:cond, $DEBUG)
  self.cmdarg = RubyParserStuff::StackState.new(:cmdarg, $DEBUG)
  self.ss     = RPStringScanner.new ""

  reset
end

Instance Attribute Details

#brace_nestObject

Returns the value of attribute brace_nest.



90
91
92
# File 'lib/ruby_lexer.rb', line 90

def brace_nest
  @brace_nest
end

#cmd_stateObject

temporary–ivar to avoid passing everywhere



93
94
95
# File 'lib/ruby_lexer.rb', line 93

def cmd_state
  @cmd_state
end

#cmdargObject

Returns the value of attribute cmdarg.



91
92
93
# File 'lib/ruby_lexer.rb', line 91

def cmdarg
  @cmdarg
end

#command_startObject

Returns the value of attribute command_start.



92
93
94
# File 'lib/ruby_lexer.rb', line 92

def command_start
  @command_start
end

#commentObject

Last comment lexed, or nil



117
118
119
# File 'lib/ruby_lexer.rb', line 117

def comment
  @comment
end

#condObject

Returns the value of attribute cond.



95
96
97
# File 'lib/ruby_lexer.rb', line 95

def cond
  @cond
end

#filenameObject

The file name / path



46
47
48
# File 'lib/ruby_lexer.rex.rb', line 46

def filename
  @filename
end

#last_stateObject

Returns the value of attribute last_state.



94
95
96
# File 'lib/ruby_lexer.rb', line 94

def last_state
  @last_state
end

#lex_stateObject

:startdoc:



88
89
90
# File 'lib/ruby_lexer.rb', line 88

def lex_state
  @lex_state
end

#lex_strtermObject

Additional context surrounding tokens that both the lexer and grammar use.



105
106
107
# File 'lib/ruby_lexer.rb', line 105

def lex_strterm
  @lex_strterm
end

#linenoObject

The current line number.



42
43
44
# File 'lib/ruby_lexer.rex.rb', line 42

def lineno
  @lineno
end

#lpar_begObject

Returns the value of attribute lpar_beg.



106
107
108
# File 'lib/ruby_lexer.rb', line 106

def lpar_beg
  @lpar_beg
end

#old_linenoObject

Returns the value of attribute old_lineno.



97
98
99
# File 'lib/ruby_lexer.rb', line 97

def old_lineno
  @old_lineno
end

#old_posObject

The previous position. Only available if the :column option is on.



79
80
81
# File 'lib/ruby_lexer.rex.rb', line 79

def old_pos
  @old_pos
end

#old_ssObject

Returns the value of attribute old_ss.



96
97
98
# File 'lib/ruby_lexer.rb', line 96

def old_ss
  @old_ss
end

#paren_nestObject

Returns the value of attribute paren_nest.



107
108
109
# File 'lib/ruby_lexer.rb', line 107

def paren_nest
  @paren_nest
end

#parserObject

HACK for very end of lexer… sigh



108
109
110
# File 'lib/ruby_lexer.rb', line 108

def parser
  @parser
end

#space_seenObject

Returns the value of attribute space_seen.



109
110
111
# File 'lib/ruby_lexer.rb', line 109

def space_seen
  @space_seen
end

#ssObject Also known as: match

The StringScanner for this lexer.



51
52
53
# File 'lib/ruby_lexer.rex.rb', line 51

def ss
  @ss
end

#start_of_current_line_posObject

The position of the start of the current line. Only available if the :column option is on.



85
86
87
# File 'lib/ruby_lexer.rex.rb', line 85

def start_of_current_line_pos
  @start_of_current_line_pos
end

#stateObject

The current lexical state.



56
57
58
# File 'lib/ruby_lexer.rex.rb', line 56

def state
  @state
end

#string_bufferObject

Returns the value of attribute string_buffer.



110
111
112
# File 'lib/ruby_lexer.rb', line 110

def string_buffer
  @string_buffer
end

#string_nestObject

Returns the value of attribute string_nest.



111
112
113
# File 'lib/ruby_lexer.rb', line 111

def string_nest
  @string_nest
end

#tokenObject

Last token read via next_token.



114
115
116
# File 'lib/ruby_lexer.rb', line 114

def token
  @token
end

Instance Method Details

#actionObject

Yields on the current action.



72
73
74
# File 'lib/ruby_lexer.rex.rb', line 72

def action
  yield
end

#arg_ambiguousObject



130
131
132
# File 'lib/ruby_lexer.rb', line 130

def arg_ambiguous
  self.warning "Ambiguous first argument. make sure."
end

#arg_stateObject



134
135
136
# File 'lib/ruby_lexer.rb', line 134

def arg_state
  is_after_operator? ? EXPR_ARG : EXPR_BEG
end

#columnObject

The current column, starting at 0. Only available if the :column option is on.



90
91
92
# File 'lib/ruby_lexer.rex.rb', line 90

def column
  old_pos - start_of_current_line_pos
end

#d(o) ⇒ Object



1119
1120
1121
# File 'lib/ruby_lexer.rb', line 1119

def d o
  $stderr.puts o.inspect
end

#debug(n) ⇒ Object



138
139
140
# File 'lib/ruby_lexer.rb', line 138

def debug n
  raise "debug #{n}"
end

#eat_whitespaceObject



4
5
6
7
8
9
10
11
# File 'lib/ruby_lexer_strings.rb', line 4

def eat_whitespace
  r = scan(/\s+/)
  self.lineno += r.count("\n") if r

  r += eat_whitespace if eos? && in_heredoc? # forces heredoc pop

  r
end

#expr_dot?Boolean

Returns:

  • (Boolean)


142
143
144
# File 'lib/ruby_lexer.rb', line 142

def expr_dot?
  lex_state =~ EXPR_DOT
end

#expr_fname?Boolean

REFACTOR

Returns:

  • (Boolean)


146
147
148
# File 'lib/ruby_lexer.rb', line 146

def expr_fname? # REFACTOR
  lex_state =~ EXPR_FNAME
end

#expr_result(token, text) ⇒ Object



150
151
152
153
154
# File 'lib/ruby_lexer.rb', line 150

def expr_result token, text
  cond.push false
  cmdarg.push false
  result EXPR_BEG, token, text
end

#heredoc(here) ⇒ Object

../compare/parse30.y:7678



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# File 'lib/ruby_lexer_strings.rb', line 13

def heredoc here                              # ../compare/parse30.y:7678
  _, term, func, _indent_max, _lineno, range = here

  start_line = lineno
  eos = term # HACK
  indent = func =~ STR_FUNC_INDENT

  self.string_buffer = []

  last_line = self.ss_string[range] if range
  eol = last_line && last_line.end_with?("\r\n") ? "\r\n" : "\n" # HACK

  expand = func =~ STR_FUNC_EXPAND

  # TODO? p->heredoc_line_indent == -1

  indent_re = indent ? "[ \t]*" : nil
  eos_re    = /#{indent_re}#{Regexp.escape eos}(?=\r?\n|\z)/
  err_msg   = "can't match #{eos_re.inspect} anywhere in "

  maybe_pop_stack
  rb_compile_error err_msg if end_of_stream?

  if beginning_of_line? && scan(eos_re) then
    scan(/\r?\n|\z/)
    self.lineno += 1 if matched =~ /\n/

    heredoc_restore

    self.lex_strterm = nil
    self.lex_state = EXPR_END

    return :tSTRING_END, [term, func, range]
  end

  if expand then
    case
    when scan(/#(?=\$(-.|[a-zA-Z_0-9~\*\$\?!@\/\\;,\.=:<>\"\&\`\'+]))/) then
      # TODO: !ISASCII
      # ?! see parser_peek_variable_name
      return :tSTRING_DVAR, matched
    when scan(/#(?=\@\@?[a-zA-Z_])/) then
      # TODO: !ISASCII
      return :tSTRING_DVAR, matched
    when scan(/#[{]/) then
      self.command_start = true
      return :tSTRING_DBEG, [matched, lineno]
    when scan(/#/) then
      string_buffer << "#"
    end

    begin
      # NOTE: this visibly diverges from the C code but uses tokadd_string
      #       to stay clean.

      str = tokadd_string func, eol, nil
      rb_compile_error err_msg if str == RubyLexer::EOF

      if str != eol then
        str = string_buffer.join
        string_buffer.clear
        return result nil, :tSTRING_CONTENT, str, start_line
      else
        string_buffer << scan(/\r?\n/)
        self.lineno += 1 # TODO: try to remove most scan(/\n/) and friends
      end
    end until check eos_re
  else
    until check(eos_re) do
      string_buffer << scan(/.*(\r?\n|\z)/)
      self.lineno += 1
      rb_compile_error err_msg if end_of_stream?
    end
  end

  string_content = begin
                     s = string_buffer.join
                     s.b.force_encoding Encoding::UTF_8
                     s
                   end
  string_buffer.clear

  result nil, :tSTRING_CONTENT, string_content, start_line
end

#heredoc_identifierObject

../compare/parse30.y:7354



98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
# File 'lib/ruby_lexer_strings.rb', line 98

def heredoc_identifier                        # ../compare/parse30.y:7354
  token  = :tSTRING_BEG
  func   = STR_FUNC_BORING
  term   = nil
  indent = nil
  quote  = nil
  char_pos = nil
  byte_pos = nil

  heredoc_indent_mods = "-"
  heredoc_indent_mods += '\~' if ruby23plus?

  case
  when scan(/([#{heredoc_indent_mods}]?)([\'\"\`])(.*?)\2/) then
    mods, quote, term = match[1], match[2], match[3]
    char_pos = ss.charpos
    byte_pos = ss.pos

    func |= STR_FUNC_INDENT unless mods.empty?
    func |= STR_FUNC_DEDENT if mods == "~"
    func |= case quote
            when "\'" then
              STR_SQUOTE
            when '"' then
              STR_DQUOTE
            when "`" then
              token = :tXSTRING_BEG
              STR_XQUOTE
            else
              debug 1
            end
  when scan(/[#{heredoc_indent_mods}]?([\'\"\`])(?!\1*\Z)/) then
    rb_compile_error "unterminated here document identifier"
  when scan(/([#{heredoc_indent_mods}]?)(#{IDENT_CHAR}+)/) then
    mods, term = match[1], match[2]
    quote = '"'
    char_pos = ss.charpos
    byte_pos = ss.pos

    func |= STR_FUNC_INDENT unless mods.empty?
    func |= STR_FUNC_DEDENT if mods == "~"
    func |= STR_DQUOTE
  else
    return
  end

  old_lineno = self.lineno
  rest_of_line = scan(/.*(?:\r?\n|\z)/)
  self.lineno += rest_of_line.count "\n"

  char_pos_end = ss.charpos - 1

  range = nil
  range = char_pos..char_pos_end unless rest_of_line.empty?

  self.lex_strterm = [:heredoc, term, func, indent, old_lineno, range, byte_pos]

  result nil, token, quote, old_lineno
end

#heredoc_restoreObject

../compare/parse30.y:7438



158
159
160
161
162
163
164
165
166
167
168
# File 'lib/ruby_lexer_strings.rb', line 158

def heredoc_restore                           # ../compare/parse30.y:7438
  _, _term, _func, _indent, lineno, range, bytepos = lex_strterm

  new_ss = ss.class.new self.ss_string[0..range.max]
  new_ss.pos = bytepos

  lineno_push lineno
  ss_push new_ss

  nil
end

#in_fname?Boolean

REFACTOR

Returns:

  • (Boolean)


156
157
158
# File 'lib/ruby_lexer.rb', line 156

def in_fname? # REFACTOR
  lex_state =~ EXPR_FNAME
end

#int_with_base(base) ⇒ Object



160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
# File 'lib/ruby_lexer.rb', line 160

def int_with_base base
  rb_compile_error "Invalid numeric format" if matched =~ /__/

  text = matched
  case
  when text.end_with?("ri")
    result EXPR_NUM, :tIMAGINARY, Complex(0, Rational(text.chop.chop.to_i(base)))
  when text.end_with?("r")
    result EXPR_NUM, :tRATIONAL, Rational(text.chop.to_i(base))
  when text.end_with?("i")
    result EXPR_NUM, :tIMAGINARY, Complex(0, text.chop.to_i(base))
  else
    result EXPR_NUM, :tINTEGER, text.to_i(base)
  end
end

#is_after_operator?Boolean

Returns:

  • (Boolean)


176
177
178
# File 'lib/ruby_lexer.rb', line 176

def is_after_operator?
  lex_state =~ EXPR_FNAME|EXPR_DOT
end

#is_arg?Boolean

Returns:

  • (Boolean)


180
181
182
# File 'lib/ruby_lexer.rb', line 180

def is_arg?
  lex_state =~ EXPR_ARG_ANY
end

#is_beg?Boolean

Returns:

  • (Boolean)


184
185
186
# File 'lib/ruby_lexer.rb', line 184

def is_beg?
  lex_state =~ EXPR_BEG_ANY || lex_state == EXPR_LAB # yes, == EXPR_LAB
end

#is_end?Boolean

Returns:

  • (Boolean)


188
189
190
# File 'lib/ruby_lexer.rb', line 188

def is_end?
  lex_state =~ EXPR_END_ANY
end

#is_label_possible?Boolean

Returns:

  • (Boolean)


192
193
194
# File 'lib/ruby_lexer.rb', line 192

def is_label_possible?
  (lex_state =~ EXPR_LABEL|EXPR_ENDFN && !cmd_state) || is_arg?
end

#is_label_suffix?Boolean

Returns:

  • (Boolean)


196
197
198
# File 'lib/ruby_lexer.rb', line 196

def is_label_suffix?
  check(/:(?!:)/)
end

#is_local_id(id) ⇒ Object



208
209
210
211
# File 'lib/ruby_lexer.rb', line 208

def is_local_id id
  # maybe just make this false for now
  self.parser.env[id.to_sym] == :lvar # HACK: this isn't remotely right
end

#is_space_arg?(c = "x") ⇒ Boolean

Returns:

  • (Boolean)


200
201
202
# File 'lib/ruby_lexer.rb', line 200

def is_space_arg? c = "x"
  is_arg? and space_seen and c !~ /\s/
end

#lambda_beginning?Boolean

Returns:

  • (Boolean)


204
205
206
# File 'lib/ruby_lexer.rb', line 204

def lambda_beginning?
  lpar_beg && lpar_beg == paren_nest
end

#locationObject

The current location in the parse.



126
127
128
129
130
131
132
# File 'lib/ruby_lexer.rex.rb', line 126

def location
  [
    (filename || "<input>"),
    lineno,
    column,
  ].compact.join(":")
end

#lvar_defined?(id) ⇒ Boolean

Returns:

  • (Boolean)


213
214
215
216
# File 'lib/ruby_lexer.rb', line 213

def lvar_defined? id
  # TODO: (dyna_in_block? && dvar_defined?(id)) || local_id?(id)
  self.parser.env[id.to_sym] == :lvar
end

#matchesObject

The match groups for the current scan.



63
64
65
66
67
# File 'lib/ruby_lexer.rex.rb', line 63

def matches
  m = (1..9).map { |i| ss[i] }
  m.pop until m[-1] or m.empty?
  m
end

#newtokObject



170
171
172
# File 'lib/ruby_lexer_strings.rb', line 170

def newtok
  string_buffer.clear
end

#next_tokenObject

Lex the next token.



137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
# File 'lib/ruby_lexer.rex.rb', line 137

def next_token
  maybe_pop_stack
  return process_string_or_heredoc if lex_strterm
  self.cmd_state = self.command_start
  self.command_start = false
  self.space_seen    = false # TODO: rename token_seen?
  self.last_state    = lex_state

  token = nil

  until ss.eos? or token do
    if ss.check(/\n/) then
      self.lineno += 1
      # line starts 1 position after the newline
      self.start_of_current_line_pos = ss.pos + 1
    end
    self.old_pos = ss.pos
    token =
      case state
      when nil then
        case
        when ss.skip(/[\ \t\r\f\v]+/) then
          action { self.space_seen = true; next }
        when text = ss.scan(/\n|\#/) then
          process_newline_or_comment text
        when text = ss.scan(/[\]\)\}]/) then
          process_brace_close text
        when ss.match?(/\!/) then
          case
          when is_after_operator? && (text = ss.scan(/\!\@/)) then
            action { result EXPR_ARG,   TOKENS[text], text }
          when text = ss.scan(/\![=~]?/) then
            action { result :arg_state, TOKENS[text], text }
          end # group /\!/
        when ss.match?(/\./) then
          case
          when text = ss.scan(/\.\.\.?/) then
            process_dots text
          when ss.skip(/\.\d/) then
            action { rb_compile_error "no .<digit> floating literal anymore put 0 before dot" }
          when ss.skip(/\./) then
            action { self.lex_state = EXPR_BEG; result EXPR_DOT, :tDOT, "." }
          end # group /\./
        when text = ss.scan(/\(/) then
          process_paren text
        when text = ss.scan(/\,/) then
          action { result EXPR_PAR, TOKENS[text], text }
        when ss.match?(/=/) then
          case
          when text = ss.scan(/\=\=\=|\=\=|\=~|\=>|\=(?!begin\b)/) then
            action { result arg_state, TOKENS[text], text }
          when bol? && (text = ss.scan(/\=begin(?=\s)/)) then
            process_begin text
          when text = ss.scan(/\=(?=begin\b)/) then
            action { result arg_state, TOKENS[text], text }
          end # group /=/
        when ruby22_label? && (text = ss.scan(/\"#{SIMPLE_STRING}\":/o)) then
          process_label text
        when text = ss.scan(/\"(#{SIMPLE_STRING})\"/o) then
          process_simple_string text
        when text = ss.scan(/\"/) then
          action { string STR_DQUOTE, '"'; result nil, :tSTRING_BEG, text }
        when text = ss.scan(/\@\@?\d/) then
          action { rb_compile_error "`#{text}` is not allowed as a variable name" }
        when text = ss.scan(/\@\@?#{IDENT_CHAR}+/o) then
          process_ivar text
        when ss.match?(/:/) then
          case
          when not_end? && (text = ss.scan(/:([a-zA-Z_]#{IDENT_CHAR}*(?:[?]|[!](?!=)|=(?==>)|=(?![=>]))?)/o)) then
            process_symbol text
          when not_end? && (text = ss.scan(/\:\"(#{SIMPLE_STRING})\"/o)) then
            process_symbol text
          when not_end? && (text = ss.scan(/\:\'(#{SSTRING})\'/o)) then
            process_symbol text
          when text = ss.scan(/\:\:/) then
            process_colon2 text
          when text = ss.scan(/\:/) then
            process_colon1 text
          end # group /:/
        when text = ss.scan(/->/) then
          action { result EXPR_ENDFN, :tLAMBDA, text }
        when text = ss.scan(/[+-]/) then
          process_plus_minus text
        when ss.match?(/[+\d]/) then
          case
          when ss.skip(/#{NUM_BAD}/o) then
            action { rb_compile_error "Invalid numeric format"  }
          when ss.skip(/#{INT_DEC}/o) then
            action { int_with_base 10                           }
          when ss.skip(/#{INT_HEX}/o) then
            action { int_with_base 16                           }
          when ss.skip(/#{INT_BIN}/o) then
            action { int_with_base 2                            }
          when ss.skip(/#{INT_OCT_BAD}/o) then
            action { rb_compile_error "Illegal octal digit."    }
          when ss.skip(/#{INT_OCT}/o) then
            action { int_with_base 8                            }
          when ss.skip(/#{FLOAT_BAD}/o) then
            action { rb_compile_error "Trailing '_' in number." }
          when text = ss.scan(/#{FLOAT}/o) then
            process_float text
          when ss.skip(/#{INT_DEC2}/o) then
            action { int_with_base 10                           }
          when ss.skip(/[0-9]/) then
            action { rb_compile_error "Bad number format" }
          end # group /[+\d]/
        when text = ss.scan(/\[/) then
          process_square_bracket text
        when was_label? && (text = ss.scan(/\'#{SSTRING}\':?/o)) then
          process_label_or_string text
        when text = ss.scan(/\'/) then
          action { string STR_SQUOTE, "'"; result nil, :tSTRING_BEG, text }
        when ss.match?(/\|/) then
          case
          when ss.skip(/\|\|\=/) then
            action { result EXPR_BEG, :tOP_ASGN, "||" }
          when ss.skip(/\|\|/) then
            action { result EXPR_BEG, :tOROP,    "||" }
          when ss.skip(/\|\=/) then
            action { result EXPR_BEG, :tOP_ASGN, "|" }
          when ss.skip(/\|/) then
            action { state = is_after_operator? ? EXPR_ARG : EXPR_PAR; result state, :tPIPE, "|" }
          end # group /\|/
        when text = ss.scan(/\{/) then
          process_brace_open text
        when ss.match?(/\*/) then
          case
          when ss.skip(/\*\*=/) then
            action { result EXPR_BEG, :tOP_ASGN, "**" }
          when ss.skip(/\*\*/) then
            action { result :arg_state, space_vs_beginning(:tDSTAR, :tDSTAR, :tPOW), "**" }
          when ss.skip(/\*\=/) then
            action { result EXPR_BEG, :tOP_ASGN, "*" }
          when ss.skip(/\*/) then
            action { result :arg_state, space_vs_beginning(:tSTAR, :tSTAR, :tSTAR2), "*" }
          end # group /\*/
        when ss.match?(/</) then
          case
          when ss.skip(/\<\=\>/) then
            action { result :arg_state, :tCMP, "<=>"    }
          when ss.skip(/\<\=/) then
            action { result :arg_state, :tLEQ, "<="     }
          when ss.skip(/\<\<\=/) then
            action { result EXPR_BEG,  :tOP_ASGN, "<<" }
          when text = ss.scan(/\<\</) then
            process_lchevron text
          when ss.skip(/\</) then
            action { result :arg_state, :tLT, "<"       }
          end # group /</
        when ss.match?(/>/) then
          case
          when ss.skip(/\>\=/) then
            action { result :arg_state, :tGEQ, ">="     }
          when ss.skip(/\>\>=/) then
            action { result EXPR_BEG,  :tOP_ASGN, ">>" }
          when ss.skip(/\>\>/) then
            action { result :arg_state, :tRSHFT, ">>"   }
          when ss.skip(/\>/) then
            action { result :arg_state, :tGT, ">"       }
          end # group />/
        when ss.match?(/\`/) then
          case
          when expr_fname? && (ss.skip(/\`/)) then
            action { result EXPR_END, :tBACK_REF2, "`" }
          when expr_dot? && (ss.skip(/\`/)) then
            action { result((cmd_state ? EXPR_CMDARG : EXPR_ARG), :tBACK_REF2, "`") }
          when ss.skip(/\`/) then
            action { string STR_XQUOTE, '`'; result nil, :tXSTRING_BEG, "`" }
          end # group /\`/
        when text = ss.scan(/\?/) then
          process_questionmark text
        when ss.match?(/&/) then
          case
          when ss.skip(/\&\&\=/) then
            action { result EXPR_BEG, :tOP_ASGN, "&&" }
          when ss.skip(/\&\&/) then
            action { result EXPR_BEG, :tANDOP,   "&&" }
          when ss.skip(/\&\=/) then
            action { result EXPR_BEG, :tOP_ASGN, "&"  }
          when ss.skip(/\&\./) then
            action { result EXPR_DOT, :tLONELY,  "&." }
          when text = ss.scan(/\&/) then
            process_amper text
          end # group /&/
        when text = ss.scan(/\//) then
          process_slash text
        when ss.match?(/\^/) then
          case
          when ss.skip(/\^=/) then
            action { result EXPR_BEG, :tOP_ASGN, "^" }
          when ss.skip(/\^/) then
            action { result :arg_state, :tCARET, "^" }
          end # group /\^/
        when ss.skip(/\;/) then
          action { self.command_start = true; result EXPR_BEG, :tSEMI, ";" }
        when ss.match?(/~/) then
          case
          when is_after_operator? && (ss.skip(/\~@/)) then
            action { result :arg_state, :tTILDE, "~" }
          when ss.skip(/\~/) then
            action { result :arg_state, :tTILDE, "~" }
          end # group /~/
        when ss.match?(/\\/) then
          case
          when ss.skip(/\\\r?\n/) then
            action { self.lineno += 1; self.space_seen = true; next }
          when ss.skip(/\\/) then
            action { rb_compile_error "bare backslash only allowed before newline" }
          end # group /\\/
        when text = ss.scan(/\%/) then
          process_percent text
        when ss.match?(/\$/) then
          case
          when text = ss.scan(/\$_\w+/) then
            process_gvar text
          when text = ss.scan(/\$_/) then
            process_gvar text
          when text = ss.scan(/\$[~*$?!@\/\\;,.=:<>\"]|\$-\w?/) then
            process_gvar text
          when in_fname? && (text = ss.scan(/\$([\&\`\'\+])/)) then
            process_gvar text
          when text = ss.scan(/\$([\&\`\'\+])/) then
            process_backref text
          when in_fname? && (text = ss.scan(/\$([1-9]\d*)/)) then
            process_gvar text
          when text = ss.scan(/\$([1-9]\d*)/) then
            process_nthref text
          when text = ss.scan(/\$0/) then
            process_gvar text
          when text = ss.scan(/\$#{IDENT_CHAR}+/) then
            process_gvar text
          when text = ss.scan(/\$\W/) then
            process_gvar_oddity text
          end # group /\$/
        when text = ss.scan(/\_/) then
          process_underscore text
        when text = ss.scan(/#{IDENT_CHAR}+/o) then
          process_token text
        when ss.skip(/\004|\032|\000|\Z/) then
          action { [RubyLexer::EOF, RubyLexer::EOF] }
        when text = ss.scan(/./) then
          action { rb_compile_error "Invalid char #{text.inspect} in expression" }
        else
          text = ss.string[ss.pos .. -1]
          raise ScanError, "can not match (#{state.inspect}) at #{location}: '#{text}'"
        end
      else
        raise ScanError, "undefined state at #{location}: '#{state}'"
      end # token = case state

    next unless token # allow functions to trigger redo w/ nil
  end # while

  raise LexerError, "bad lexical result at #{location}: #{token.inspect}" unless
    token.nil? || (Array === token && token.size >= 2)

  # auto-switch state
  self.state = token.last if token && token.first == :state

  token
end

#nextcObject



174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
# File 'lib/ruby_lexer_strings.rb', line 174

def nextc
  # TODO:
  # if (UNLIKELY((p->lex.pcur == p->lex.pend) || p->eofp || RTEST(p->lex.nextline))) {
  #     if (nextline(p)) return -1;
  # }

  maybe_pop_stack

  c = ss.getch

  if c == "\n" then
    ss.unscan
    c = nil
  end

  c
end

#not_end?Boolean

Returns:

  • (Boolean)


218
219
220
# File 'lib/ruby_lexer.rb', line 218

def not_end?
  not is_end?
end

#old_lex_strterm=Object

Additional context surrounding tokens that both the lexer and grammar use.



1123
1124
1125
# File 'lib/ruby_lexer.rb', line 1123

def lex_strterm=(value)
  @lex_strterm = value
end

#parse(str) ⇒ Object

Parse the given string.



104
105
106
107
108
109
110
111
# File 'lib/ruby_lexer.rex.rb', line 104

def parse str
  self.ss     = scanner_class.new str
  self.lineno = 1
  self.start_of_current_line_pos = 0
  self.state  ||= nil

  do_parse
end

#parse_file(path) ⇒ Object

Read in and parse the file at path.



116
117
118
119
120
121
# File 'lib/ruby_lexer.rex.rb', line 116

def parse_file path
  self.filename = path
  open path do |f|
    parse f.read
  end
end

#parse_string(quote) ⇒ Object

../compare/parse30.y:7273



192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
# File 'lib/ruby_lexer_strings.rb', line 192

def parse_string quote                         # ../compare/parse30.y:7273
  _, func, term, paren = quote

  qwords = func =~ STR_FUNC_QWORDS
  regexp = func =~ STR_FUNC_REGEXP
  expand = func =~ STR_FUNC_EXPAND
  list   = func =~ STR_FUNC_LIST
  termx  = func =~ STR_FUNC_TERM # TODO: document wtf this means

  space = false
  term_re = regexp_cache[term]

  if termx then
    # self.nextc if qwords # delayed term

    self.lex_strterm = nil

    return result EXPR_END, regexp ? :tREGEXP_END : :tSTRING_END, term
  end

  space = true if qwords and eat_whitespace

  if list then
    debug 4
    # quote[1] -= STR_FUNC_LIST
    # space = true
  end

  # TODO: move to quote.nest!
  if string_nest == 0 && scan(term_re) then
    if qwords then
      quote[1] |= STR_FUNC_TERM

      return :tSPACE, matched
    end

    return string_term func
  end

  return result nil, :tSPACE, " " if space

  newtok

  if expand && check(/#/) then
    t = self.scan_variable_name
    return t if t

    tokadd "#"
  end

  # TODO: add string_nest, enc, base_enc ?
  lineno = self.lineno
  if tokadd_string(func, term, paren) == RubyLexer::EOF then
    if qwords then
      rb_compile_error "unterminated list meets end of file"
    end

    if regexp then
      rb_compile_error "unterminated regexp meets end of file"
    else
      rb_compile_error "unterminated string meets end of file"
    end
  end

  result nil, :tSTRING_CONTENT, string_buffer.join, lineno
end

#possibly_escape_string(text, check) ⇒ Object



222
223
224
225
226
227
228
229
230
# File 'lib/ruby_lexer.rb', line 222

def possibly_escape_string text, check
  content = match[1]

  if text =~ check then
    unescape_string content
  else
    content.gsub(/\\\\/, "\\").gsub(/\\\'/, "'")
  end
end

#process_amper(text) ⇒ Object



232
233
234
235
236
237
238
239
240
241
242
243
# File 'lib/ruby_lexer.rb', line 232

def process_amper text
  token = if is_arg? && space_seen && !check(/\s/) then
             warning("`&' interpreted as argument prefix")
             :tAMPER
           elsif lex_state =~ EXPR_BEG|EXPR_MID then
             :tAMPER
           else
             :tAMPER2
           end

  result :arg_state, token, "&"
end

#process_backref(text) ⇒ Object



245
246
247
248
249
# File 'lib/ruby_lexer.rb', line 245

def process_backref text
  token = match[1].to_sym
  # TODO: can't do lineno hack w/ symbol
  result EXPR_END, :tBACK_REF, token
end

#process_begin(text) ⇒ Object



251
252
253
254
255
256
257
258
259
260
261
262
263
264
# File 'lib/ruby_lexer.rb', line 251

def process_begin text
  self.comment ||= +""
  self.comment << matched

  unless scan(/.*?\n=end( |\t|\f)*[^\n]*(\n|\z)/m) then
    self.comment = nil
    rb_compile_error("embedded document meets end of file")
  end

  self.comment << matched
  self.lineno += matched.count("\n") # HACK?

  nil # TODO
end

#process_brace_close(text) ⇒ Object

TODO: make all tXXXX terminals include lexer.lineno … enforce it somehow?



268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
# File 'lib/ruby_lexer.rb', line 268

def process_brace_close text
  case matched
  when "}" then
    self.brace_nest -= 1
    return :tSTRING_DEND, matched if brace_nest < 0
  end

  # matching compare/parse26.y:8099
  cond.pop
  cmdarg.pop

  case matched
  when "}" then
    self.lex_state   = ruby24minus? ? EXPR_ENDARG : EXPR_END
    return :tRCURLY, matched
  when "]" then
    self.paren_nest -= 1
    self.lex_state   = ruby24minus? ? EXPR_ENDARG : EXPR_END
    return :tRBRACK, matched
  when ")" then
    self.paren_nest -= 1
    self.lex_state   = EXPR_ENDFN
    return :tRPAREN, matched
  else
    raise "Unknown bracing: #{matched.inspect}"
  end
end

#process_brace_open(text) ⇒ Object



296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
# File 'lib/ruby_lexer.rb', line 296

def process_brace_open text
  # matching compare/parse23.y:8694
  self.brace_nest += 1

  if lambda_beginning? then
    self.lpar_beg = nil
    self.paren_nest -= 1 # close arg list when lambda opens body

    return expr_result(:tLAMBEG, "{")
  end

  token = case
          when lex_state =~ EXPR_LABELED then
            :tLBRACE     # hash
          when lex_state =~ EXPR_ARG_ANY|EXPR_END|EXPR_ENDFN then
            :tLCURLY     # block (primary) "{" in parse.y
          when lex_state =~ EXPR_ENDARG then
            :tLBRACE_ARG # block (expr)
          else
            :tLBRACE     # hash
          end

  state = token == :tLBRACE_ARG ? EXPR_BEG : EXPR_PAR
  self.command_start = true if token != :tLBRACE

  cond.push false
  cmdarg.push false
  result state, token, text
end

#process_colon1(text) ⇒ Object



326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
# File 'lib/ruby_lexer.rb', line 326

def process_colon1 text
  # ?: / then / when
  if is_end? || check(/\s/) then
    return result EXPR_BEG, :tCOLON, text
  end

  case
  when scan(/\'/) then
    string STR_SSYM, matched
  when scan(/\"/) then
    string STR_DSYM, matched
  end

  result EXPR_FNAME, :tSYMBEG, text
end

#process_colon2(text) ⇒ Object



342
343
344
345
346
347
348
# File 'lib/ruby_lexer.rb', line 342

def process_colon2 text
  if is_beg? || lex_state =~ EXPR_CLASS || is_space_arg? then
    result EXPR_BEG, :tCOLON3, text
  else
    result EXPR_DOT, :tCOLON2, text
  end
end

#process_dots(text) ⇒ Object

parse32.y:10216



350
351
352
353
354
355
356
357
358
359
360
# File 'lib/ruby_lexer.rb', line 350

def process_dots text # parse32.y:10216
  is_beg = self.is_beg?
  self.lex_state = EXPR_BEG

  return result EXPR_ENDARG, :tBDOT3, text if
    parser.in_argdef && text == "..." # TODO: version check?

  tokens = ruby27plus? && is_beg ? BTOKENS : TOKENS

  result EXPR_BEG, tokens[text], text
end

#process_float(text) ⇒ Object



362
363
364
365
366
367
368
369
370
371
372
373
374
375
# File 'lib/ruby_lexer.rb', line 362

def process_float text
  rb_compile_error "Invalid numeric format" if text =~ /__/

  case
  when text.end_with?("ri")
    result EXPR_NUM, :tIMAGINARY, Complex(0, Rational(text.chop.chop))
  when text.end_with?("i")
    result EXPR_NUM, :tIMAGINARY, Complex(0, text.chop.to_f)
  when text.end_with?("r")
    result EXPR_NUM, :tRATIONAL,  Rational(text.chop)
  else
    result EXPR_NUM, :tFLOAT, text.to_f
  end
end

#process_gvar(text) ⇒ Object



377
378
379
380
381
382
383
# File 'lib/ruby_lexer.rb', line 377

def process_gvar text
  if parser.class.version > 20 && text == "$-" then
    rb_compile_error "unexpected $undefined"
  end

  result EXPR_END, :tGVAR, text
end

#process_gvar_oddity(text) ⇒ Object



385
386
387
# File 'lib/ruby_lexer.rb', line 385

def process_gvar_oddity text
  rb_compile_error "#{text.inspect} is not allowed as a global variable name"
end

#process_ivar(text) ⇒ Object



389
390
391
392
# File 'lib/ruby_lexer.rb', line 389

def process_ivar text
  tok_id = text =~ /^@@/ ? :tCVAR : :tIVAR
  result EXPR_END, tok_id, text
end

#process_label(text) ⇒ Object



394
395
396
397
398
# File 'lib/ruby_lexer.rb', line 394

def process_label text
  symbol = possibly_escape_string text, /^\"/

  result EXPR_LAB, :tLABEL, symbol
end

#process_label_or_string(text) ⇒ Object



400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
# File 'lib/ruby_lexer.rb', line 400

def process_label_or_string text
  if @was_label && text =~ /:\Z/ then
    @was_label = nil
    return process_label text
  elsif text =~ /:\Z/ then
    self.pos -= 1 # put back ":"
    text = text[0..-2]
  end

  orig_line = lineno
  str = text[1..-2].gsub(/\\\\/, "\\").gsub(/\\\'/, "\'")
  self.lineno += str.count("\n")

  result EXPR_END, :tSTRING, str, orig_line
end

#process_lchevron(text) ⇒ Object



416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
# File 'lib/ruby_lexer.rb', line 416

def process_lchevron text
  if (lex_state !~ EXPR_DOT|EXPR_CLASS &&
      !is_end? &&
      (!is_arg? || lex_state =~ EXPR_LABELED || space_seen)) then
    tok = self.heredoc_identifier
    return tok if tok
  end

  if is_after_operator? then
    self.lex_state = EXPR_ARG
  else
    self.command_start = true if lex_state =~ EXPR_CLASS
    self.lex_state = EXPR_BEG
  end

  result lex_state, :tLSHFT, "\<\<"
end

#process_newline_or_comment(text) ⇒ Object

../compare/parse30.y:9126 ish



434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
# File 'lib/ruby_lexer.rb', line 434

def process_newline_or_comment text    # ../compare/parse30.y:9126 ish
  c = matched

  if c == "#" then
    self.pos -= 1

    while scan(/\s*\#.*(\n+|\z)/) do
      self.lineno += matched.count "\n"
      self.comment ||= +""
      self.comment << matched.gsub(/^ +#/, "#").gsub(/^ +$/, "")
    end

    return nil if end_of_stream?
  end

  c = (lex_state =~ EXPR_BEG|EXPR_CLASS|EXPR_FNAME|EXPR_DOT &&
       lex_state !~ EXPR_LABELED)
  if c || self.lex_state == EXPR_LAB then # yes, == EXPR_LAB
    # ignore if !fallthrough?
    if !c && parser.in_kwarg then
      # normal newline
      self.command_start = true
      return result EXPR_BEG, :tNL, nil
    else
      maybe_pop_stack
      return # goto retry
    end
  end

  if scan(/[\ \t\r\f\v]+/) then
    self.space_seen = true
  end

  if check(/#/) then
    return # goto retry
  elsif check(/&\.|\.(?!\.)/) then # C version is a hellish obfuscated xnor
    return # goto retry
  end

  self.command_start = true

  result EXPR_BEG, :tNL, nil
end

#process_nthref(text) ⇒ Object



478
479
480
481
# File 'lib/ruby_lexer.rb', line 478

def process_nthref text
  # TODO: can't do lineno hack w/ number
  result EXPR_END, :tNTH_REF, match[1].to_i
end

#process_paren(text) ⇒ Object



483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
# File 'lib/ruby_lexer.rb', line 483

def process_paren text
  token = if is_beg? then
            :tLPAREN
          elsif !space_seen then
            # foo( ... ) => method call, no ambiguity
            :tLPAREN2
          elsif is_space_arg? then
            :tLPAREN_ARG
          elsif lex_state =~ EXPR_ENDFN && !lambda_beginning? then
            # TODO:
            # warn("parentheses after method name is interpreted as " \
            #      "an argument list, not a decomposed argument")
            :tLPAREN2
          else
            :tLPAREN2 # plain "(" in parse.y
          end

  self.paren_nest += 1

  cond.push false
  cmdarg.push false
  result EXPR_PAR, token, text
end

#process_percent(text) ⇒ Object



507
508
509
510
511
512
513
514
515
516
517
518
# File 'lib/ruby_lexer.rb', line 507

def process_percent text
  case
  when is_beg? then
    process_percent_quote
  when scan(/\=/)
    result EXPR_BEG, :tOP_ASGN, "%"
  when is_space_arg?(check(/\s/)) || (lex_state =~ EXPR_FITEM && check(/s/))
    process_percent_quote
  else
    result :arg_state, :tPERCENT, "%"
  end
end

#process_percent_quoteObject

called from process_percent



260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
# File 'lib/ruby_lexer_strings.rb', line 260

def process_percent_quote                      # ../compare/parse30.y:8645
  c = getch # type %<type><term>...<term>

  long_hand = !!(c =~ /[QqWwIixrs]/)

  if end_of_stream? || c !~ /\p{Alnum}/ then
    term = c # TODO? PERCENT_END[c] || c

    debug 2 if c && c !~ /\p{ASCII}/
    c = "Q"
  else
    term = getch

    debug 3 if term =~ /\p{Alnum}|\P{ASCII}/
  end

  if end_of_stream? or c == RubyLexer::EOF or term == RubyLexer::EOF then
    rb_compile_error "unterminated quoted string meets end of file"
  end

  # "\0" is special to indicate beg=nnd and that no nesting?
  paren = term
  term = PERCENT_END[term]
  term, paren = paren, "\0" if term.nil? # TODO: "\0" -> nil

  text = long_hand ? "%#{c}#{paren}" : "%#{term}"

  current_line = self.lineno

  token_type, string_type =
    case c
    when "Q" then
      [:tSTRING_BEG,   STR_DQUOTE]
    when "q" then
      [:tSTRING_BEG,   STR_SQUOTE]
    when "W" then
      eat_whitespace
      [:tWORDS_BEG,    STR_DQUOTE | STR_FUNC_QWORDS]
    when "w" then
      eat_whitespace
      [:tQWORDS_BEG,   STR_SQUOTE | STR_FUNC_QWORDS]
    when "I" then
      eat_whitespace
      [:tSYMBOLS_BEG,  STR_DQUOTE | STR_FUNC_QWORDS]
    when "i" then
      eat_whitespace
      [:tQSYMBOLS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
    when "x" then
      [:tXSTRING_BEG,  STR_XQUOTE]
    when "r" then
      [:tREGEXP_BEG,   STR_REGEXP]
    when "s" then
      self.lex_state = EXPR_FNAME
      [:tSYMBEG,       STR_SSYM]
    else
      rb_compile_error "unknown type of %string. Expected [QqWwIixrs], found '#{c}'."
    end

  string string_type, term, paren

  result nil, token_type, text, current_line
end

#process_plus_minus(text) ⇒ Object



520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
# File 'lib/ruby_lexer.rb', line 520

def process_plus_minus text
  sign = matched
  utype, type = if sign == "+" then
                  [:tUPLUS, :tPLUS]
                else
                  [:tUMINUS, :tMINUS]
                end

  if is_after_operator? then
    if scan(/@/) then
      return result(EXPR_ARG, utype, "#{sign}@")
    else
      return result(EXPR_ARG, type, sign)
    end
  end

  return result(EXPR_BEG, :tOP_ASGN, sign) if scan(/\=/)

  if is_beg? || (is_arg? && space_seen && !check(/\s/)) then
    arg_ambiguous if is_arg?

    if check(/\d/) then
      return nil if utype == :tUPLUS
      return result EXPR_BEG, :tUMINUS_NUM, sign
    end

    return result EXPR_BEG, utype, sign
  end

  result EXPR_BEG, type, sign
end

#process_questionmark(text) ⇒ Object



552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
# File 'lib/ruby_lexer.rb', line 552

def process_questionmark text
  if is_end? then
    return result EXPR_BEG, :tEH, "?"
  end

  if end_of_stream? then
    rb_compile_error "incomplete character syntax: parsed #{text.inspect}"
  end

  if check(/\s|\v/) then
    unless is_arg? then
      c2 = { " " => "s",
            "\n" => "n",
            "\t" => "t",
            "\v" => "v",
            "\r" => "r",
            "\f" => "f" }[matched]

      if c2 then
        warning("invalid character syntax; use ?\\" + c2)
      end
    end

    # ternary
    return result EXPR_BEG, :tEH, "?"
  elsif check(/\w(?=\w)/) then # ternary, also
    return result EXPR_BEG, :tEH, "?"
  end

  c = if scan(/\\/) then
        self.read_escape
      else
        getch
      end

  result EXPR_END, :tSTRING, c
end

#process_simple_string(text) ⇒ Object



590
591
592
593
594
595
596
597
# File 'lib/ruby_lexer.rb', line 590

def process_simple_string text
  orig_line = lineno
  self.lineno += text.count("\n")

  str = unescape_string text[1..-2]

  result EXPR_END, :tSTRING, str, orig_line
end

#process_slash(text) ⇒ Object



599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
# File 'lib/ruby_lexer.rb', line 599

def process_slash text
  if is_beg? then
    string STR_REGEXP, matched

    return result nil, :tREGEXP_BEG, "/"
  end

  if scan(/\=/) then
    return result(EXPR_BEG, :tOP_ASGN, "/")
  end

  if is_arg? && space_seen then
    unless scan(/\s/) then
      arg_ambiguous
      string STR_REGEXP, "/"
      return result(nil, :tREGEXP_BEG, "/")
    end
  end

  result :arg_state, :tDIVIDE, "/"
end

#process_square_bracket(text) ⇒ Object



621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
# File 'lib/ruby_lexer.rb', line 621

def process_square_bracket text
  self.paren_nest += 1

  token = nil

  if is_after_operator? then
    case
    when scan(/\]\=/) then
      self.paren_nest -= 1 # HACK? I dunno, or bug in MRI
      return result EXPR_ARG, :tASET, "[]="
    when scan(/\]/) then
      self.paren_nest -= 1 # HACK? I dunno, or bug in MRI
      return result EXPR_ARG, :tAREF, "[]"
    else
      rb_compile_error "unexpected '['"
    end
  elsif is_beg? then
    token = :tLBRACK
  elsif is_arg? && (space_seen || lex_state =~ EXPR_LABELED) then
    token = :tLBRACK
  else
    token = :tLBRACK2
  end

  cond.push false
  cmdarg.push false
  result EXPR_PAR, token, text
end

#process_string_or_heredocObject

../compare/parse30.y:9075



323
324
325
326
327
328
329
# File 'lib/ruby_lexer_strings.rb', line 323

def process_string_or_heredoc                  # ../compare/parse30.y:9075
  if lex_strterm[0] == :heredoc then
    self.heredoc lex_strterm
  else
    self.parse_string lex_strterm
  end
end

#process_symbol(text) ⇒ Object



650
651
652
653
654
# File 'lib/ruby_lexer.rb', line 650

def process_symbol text
  symbol = possibly_escape_string text, /^:\"/ # stupid emacs

  result EXPR_LIT, :tSYMBOL, symbol
end

#process_token(text) ⇒ Object



656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
# File 'lib/ruby_lexer.rb', line 656

def process_token text
  # matching: parse_ident in compare/parse23.y:7989
  # FIX: remove: self.last_state = lex_state

  token = self.token = text
  token << matched if scan(/[\!\?](?!=)/)

  tok_id =
    case
    when token =~ /[!?]$/ then
      :tFID
    when lex_state =~ EXPR_FNAME && scan(/=(?:(?![~>=])|(?==>))/) then
      # ident=, not =~ => == or followed by =>
      # TODO test lexing of a=>b vs a==>b
      token << matched
      :tIDENTIFIER
    when token =~ /^[A-Z]/ then
      :tCONSTANT
    else
      :tIDENTIFIER
    end

  if is_label_possible? and is_label_suffix? then
    scan(/:/)
    return result EXPR_LAB, :tLABEL, token
  end

  # TODO: mb == ENC_CODERANGE_7BIT && lex_state !~ EXPR_DOT
  if lex_state !~ EXPR_DOT then
    # See if it is a reserved word.
    keyword = RubyParserStuff::Keyword.keyword token

    return process_token_keyword keyword if keyword
  end

  # matching: compare/parse32.y:9031
  state = if lex_state =~ EXPR_BEG_ANY|EXPR_ARG_ANY|EXPR_DOT then
            cmd_state ? EXPR_CMDARG : EXPR_ARG
          elsif lex_state =~ EXPR_FNAME then
            EXPR_ENDFN
          else
            EXPR_END
          end
  self.lex_state = state

  tok_id = :tIDENTIFIER if tok_id == :tCONSTANT && is_local_id(token)

  if last_state !~ EXPR_DOT|EXPR_FNAME and
      (tok_id == :tIDENTIFIER) and # not EXPR_FNAME, not attrasgn
      lvar_defined?(token) then
    state = EXPR_END|EXPR_LABEL
  end

  result state, tok_id, token
end

#process_token_keyword(keyword) ⇒ Object



712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
# File 'lib/ruby_lexer.rb', line 712

def process_token_keyword keyword
  # matching MIDDLE of parse_ident in compare/parse32.y:9695
  state = lex_state

  return result(EXPR_ENDFN, keyword.id0, token) if lex_state =~ EXPR_FNAME

  self.lex_state = keyword.state
  self.command_start = true if lex_state =~ EXPR_BEG

  case
  when keyword.id0 == :kDO then # parse32.y line 9712
    case
    when lambda_beginning? then
      self.lpar_beg = nil # lambda_beginning? == FALSE in the body of "-> do ... end"
      self.paren_nest -= 1 # TODO: question this?
      result lex_state, :kDO_LAMBDA, token
    when cond.is_in_state then
      result lex_state, :kDO_COND, token
    when cmdarg.is_in_state && state != EXPR_CMDARG then
      result lex_state, :kDO_BLOCK, token
    else
      result lex_state, :kDO, token
    end
  when state =~ EXPR_PAD then
    result lex_state, keyword.id0, token
  when keyword.id0 != keyword.id1 then
    result EXPR_PAR, keyword.id1, token
  else
    result lex_state, keyword.id1, token
  end
end

#process_underscore(text) ⇒ Object



744
745
746
747
748
749
750
751
752
753
# File 'lib/ruby_lexer.rb', line 744

def process_underscore text
  self.unscan # put back "_"

  if beginning_of_line? && scan(/\__END__(\r?\n|\Z)/) then
    ss.terminate
    [RubyLexer::EOF, RubyLexer::EOF]
  elsif scan(/#{IDENT_CHAR}+/) then
    process_token matched
  end
end

#rb_compile_error(msg) ⇒ Object



755
756
757
758
# File 'lib/ruby_lexer.rb', line 755

def rb_compile_error msg
  msg += ". near line #{self.lineno}: #{self.rest[/^.*/].inspect}"
  raise RubyParser::SyntaxError, msg
end

#read_escape(flags = nil) ⇒ Object

../compare/parse30.y:6712



331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
# File 'lib/ruby_lexer_strings.rb', line 331

def read_escape flags = nil                    # ../compare/parse30.y:6712
  case
  when scan(/\\/) then                  # Backslash
    '\\'
  when scan(/n/) then                   # newline
    "\n"
  when scan(/t/) then                   # horizontal tab
    "\t"
  when scan(/r/) then                   # carriage-return
    "\r"
  when scan(/f/) then                   # form-feed
    "\f"
  when scan(/v/) then                   # vertical tab
    "\13"
  when scan(/a/) then                   # alarm(bell)
    "\007"
  when scan(/e/) then                   # escape
    "\033"
  when scan(/[0-7]{1,3}/) then          # octal constant
    (matched.to_i(8) & 0xFF).chr.force_encoding Encoding::UTF_8
  when scan(/x([0-9a-fA-F]{1,2})/) then # hex constant
    # TODO: force encode everything to UTF-8?
    match[1].to_i(16).chr.force_encoding Encoding::UTF_8
  when scan(/b/) then                   # backspace
    "\010"
  when scan(/s/) then                   # space
    " "
  when check(/M-\\u/) then
    debug 5
  when scan(/M-\\(?=.)/) then
    c = read_escape
    c[0] = (c[0].ord | 0x80).chr
    c
  when scan(/M-(\p{ASCII})/) then
    # TODO: ISCNTRL(c) -> goto eof
    c = match[1]
    c[0] = (c[0].ord | 0x80).chr
    c
  when check(/(C-|c)\\u/) then
    debug 6
  when scan(/(C-|c)\\?\?/) then
    127.chr
  when scan(/(C-|c)\\/) then
    c = read_escape
    c[0] = (c[0].ord & 0x9f).chr
    c
  when scan(/(?:C-|c)(.)/) then
    c = match[1]
    c[0] = (c[0].ord & 0x9f).chr
    c
  when scan(/^[89]/i) then # bad octal or hex... MRI ignores them :(
    matched
  when scan(/u(\h{4})/) then
    [match[1].to_i(16)].pack("U")
  when scan(/u(\h{1,3})/) then
    debug 7
    rb_compile_error "Invalid escape character syntax"
  when scan(/u\{(\h+(?: +\h+)*)\}/) then
    match[1].split.map { |s| s.to_i(16) }.pack("U*")
  when scan(/[McCx0-9]/) || end_of_stream? then
    rb_compile_error("Invalid escape character syntax")
  else
    getch
  end.dup
end

#regexp_cacheObject



61
62
63
# File 'lib/ruby_lexer.rb', line 61

def regexp_cache
  @@regexp_cache
end

#regx_optionsObject

../compare/parse30.y:6914



397
398
399
400
401
402
403
404
405
406
# File 'lib/ruby_lexer_strings.rb', line 397

def regx_options                               # ../compare/parse30.y:6914
  newtok

  options = scan(/\p{Alpha}+/) || ""

  rb_compile_error("unknown regexp options: %s" % [options]) if
    options =~ /[^ixmonesu]/

  options
end

#resetObject



760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
# File 'lib/ruby_lexer.rb', line 760

def reset
  self.lineno        = 1
  self.brace_nest    = 0
  self.command_start = true
  self.comment       = nil
  self.lex_state     = EXPR_NONE
  self.lex_strterm   = nil
  self.lpar_beg      = nil
  self.paren_nest    = 0
  self.space_seen    = false
  self.string_nest   = 0
  self.token         = nil
  self.string_buffer = []
  self.old_ss        = nil
  self.old_lineno    = nil

  self.cond.reset
  self.cmdarg.reset
end

#result(new_state, token, text, line = self.lineno) ⇒ Object

:nodoc:



780
781
782
783
784
785
# File 'lib/ruby_lexer.rb', line 780

def result new_state, token, text, line = self.lineno # :nodoc:
  new_state = self.arg_state if new_state == :arg_state
  self.lex_state = new_state if new_state

  [token, [text, line]]
end

#ruby22_label?Boolean

Returns:

  • (Boolean)


787
788
789
# File 'lib/ruby_lexer.rb', line 787

def ruby22_label?
  ruby22plus? and is_label_possible?
end

#ruby22plus?Boolean

Returns:

  • (Boolean)


791
792
793
# File 'lib/ruby_lexer.rb', line 791

def ruby22plus?
  parser.class.version >= 22
end

#ruby23plus?Boolean

Returns:

  • (Boolean)


795
796
797
# File 'lib/ruby_lexer.rb', line 795

def ruby23plus?
  parser.class.version >= 23
end

#ruby24minus?Boolean

Returns:

  • (Boolean)


799
800
801
# File 'lib/ruby_lexer.rb', line 799

def ruby24minus?
  parser.class.version <= 24
end

#ruby27plus?Boolean

Returns:

  • (Boolean)


803
804
805
# File 'lib/ruby_lexer.rb', line 803

def ruby27plus?
  parser.class.version >= 27
end

#scan_variable_nameObject

../compare/parse30.y:7208



408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
# File 'lib/ruby_lexer_strings.rb', line 408

def scan_variable_name                        # ../compare/parse30.y:7208
  case
  when scan(/#(?=\$(-.|[a-zA-Z_0-9~\*\$\?!@\/\\;,\.=:<>\"\&\`\'+]))/) then
    # TODO: !ISASCII
    return :tSTRING_DVAR, matched
  when scan(/#(?=\@\@?[a-zA-Z_])/) then
    # TODO: !ISASCII
    return :tSTRING_DVAR, matched
  when scan(/#[{]/) then
    self.command_start = true
    return :tSTRING_DBEG, [matched, lineno]
  when scan(/#/) then
    # do nothing but swallow
  end

  # if scan(/\P{ASCII}|_|\p{Alpha}/) then # TODO: fold into above DVAR cases
  #   # if (!ISASCII(c) || c == '_' || ISALPHA(c))
  #   #     return tSTRING_DVAR;
  # end

  nil
end

#scanner_classObject

The current scanner class. Must be overridden in subclasses.



97
98
99
# File 'lib/ruby_lexer.rex.rb', line 97

def scanner_class
  StringScanner
end

#space_vs_beginning(space_type, beg_type, fallback) ⇒ Object



807
808
809
810
811
812
813
814
815
816
817
# File 'lib/ruby_lexer.rb', line 807

def space_vs_beginning space_type, beg_type, fallback
  if is_space_arg? check(/./m) then
    warning "`**' interpreted as argument prefix"
    space_type
  elsif is_beg? then
    beg_type
  else
    # TODO: warn_balanced("**", "argument prefix");
    fallback
  end
end

#string(type, beg, nnd = nil) ⇒ Object



431
432
433
434
435
436
437
438
# File 'lib/ruby_lexer_strings.rb', line 431

def string type, beg, nnd = nil
  # label = (IS_LABEL_POSSIBLE() ? str_label : 0);
  # p->lex.strterm = NEW_STRTERM(str_dquote | label, '"', 0);
  # p->lex.ptok = p->lex.pcur-1;

  type |= STR_FUNC_LABEL if is_label_possible?
  self.lex_strterm = [:strterm, type, beg, nnd || "\0"]
end

#string_term(func) ⇒ Object

../compare/parse30.y:7254



440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
# File 'lib/ruby_lexer_strings.rb', line 440

def string_term func                          # ../compare/parse30.y:7254
  self.lex_strterm = nil

  return result EXPR_END, :tREGEXP_END, self.regx_options if
    func =~ STR_FUNC_REGEXP

  if func =~ STR_FUNC_LABEL && is_label_suffix? then
    self.getch
    self.lex_state = EXPR_BEG|EXPR_LABEL

    return :tLABEL_END, string_buffer.join
  end

  self.lex_state = EXPR_END

  return :tSTRING_END, [self.matched, func]
end

#tokadd(c) ⇒ Object

../compare/parse30.y:6548



458
459
460
# File 'lib/ruby_lexer_strings.rb', line 458

def tokadd c                                  # ../compare/parse30.y:6548
  string_buffer << c
end

#tokadd_escapeObject

../compare/parse30.y:6840



462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
# File 'lib/ruby_lexer_strings.rb', line 462

def tokadd_escape                              # ../compare/parse30.y:6840
  case
  when scan(/\\\n/) then
    # just ignore
  when scan(/\\([0-7]{1,3}|x[0-9a-fA-F]{1,2})/) then
    tokadd matched
  when scan(/\\([MC]-|c)(?=\\)/) then
    tokadd matched
    self.tokadd_escape
  when scan(/\\([MC]-|c)(.)/) then
    tokadd matched

    self.tokadd_escape if check(/\\/) # recurse if continued!
  when scan(/\\[McCx]/) then # all unprocessed branches from above have failed
    rb_compile_error "Invalid escape character syntax"
  when scan(/\\(.)/m) then
    chr, = match[1]

    tokadd "\\"
    tokadd chr
  else
    rb_compile_error "Invalid escape character syntax: %p" % [self.rest.lines.first]
  end
end

#tokadd_string(func, term, paren) ⇒ Object

../compare/parse30.y:7020



487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
# File 'lib/ruby_lexer_strings.rb', line 487

def tokadd_string func, term, paren           # ../compare/parse30.y:7020
  qwords = func =~ STR_FUNC_QWORDS
  escape = func =~ STR_FUNC_ESCAPE
  expand = func =~ STR_FUNC_EXPAND
  regexp = func =~ STR_FUNC_REGEXP

  paren_re = regexp_cache[paren] if paren != "\0"
  term_re  = if term == "\n"
               /\r?\n/
             else
               regexp_cache[term]
             end

  until end_of_stream? do
    case
    when paren_re && scan(paren_re) then
      self.string_nest += 1
    when scan(term_re) then
      if self.string_nest == 0 then
        self.pos -= 1 # TODO: ss.unscan 665 errors #$ HACK: why do we depend on this so hard?
        break # leave eos loop, go parse term in caller (heredoc or parse_string)
      else
        self.lineno += matched.count("\n")
        self.string_nest -= 1
      end

    when expand && check(/#[\$\@\{]/) then
      # do nothing since we used `check`
      break # leave eos loop
    when check(/\\/) then
      case
      when scan(/\\\n/) then
        self.lineno += 1
        case
        when qwords then
          tokadd "\n"
          next
        when expand then
          next if func !~ STR_FUNC_INDENT

          if term == "\n" then
            unscan     # rollback
            scan(/\\/) # and split
            scan(/\n/) # this is `matched`
            break
          end

          tokadd "\\"
          debug 9
        else
          unscan     # rollback
          scan(/\\/) # this is `matched`
        end
      when check(/\\\\/) then
        tokadd '\\' if escape
        nextc # ignore 1st \\
        nextc # for tokadd ss.matched, below
      when scan(/\\u/) then
        unless expand then
          tokadd "\\"
          next
        end

        tokadd_utf8 term, func, regexp

        next
      else
        scan(/\\/) # eat it, we know it's there

        return RubyLexer::EOF if end_of_stream?

        if scan(/\P{ASCII}/) then
          tokadd "\\" unless expand
          tokadd self.matched
          next
        end

        case
        when regexp then
          if term !~ SIMPLE_RE_META && scan(term_re) then
            tokadd matched
            next
          end

          self.pos -= 1 # TODO: ss.unscan 15 errors
          # HACK? decide whether to eat the \\ above
          if _esc = tokadd_escape && end_of_stream? then
            debug 10
          end

          next # C's continue = Ruby's next
        when expand then
          tokadd "\\" if escape
          tokadd read_escape
          next
        when qwords && scan(/\s/) then
          # ignore backslashed spaces in %w
        when !check(term_re) && !(paren_re && check(paren_re)) then
          tokadd "\\"
          next
        else
          getch # slurp it too for matched below
        end
      end # inner case for /\\/

    when scan(/\P{ASCII}/) then
      # not currently checking encoding stuff -- drops to tokadd below
    when qwords && check(/\s/) then
      break # leave eos loop
    else
      t  = Regexp.escape term == "\n" ? "\r\n" : term
      x  = Regexp.escape paren if paren && paren != "\000"
      q  = "\\s" if qwords
      re = /[^#{t}#{x}\#\\#{q}]+/

      scan re or getch
      self.lineno += matched.count "\n" if matched
    end # big case

    tokadd self.matched
  end # until end_of_stream?

  if self.matched then
    self.matched
  elsif end_of_stream? then
    RubyLexer::EOF
  end
end

#tokadd_utf8(term, func, regexp_literal) ⇒ Object

tokadd_string



616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
# File 'lib/ruby_lexer_strings.rb', line 616

def tokadd_utf8 term, func, regexp_literal    # ../compare/parse30.y:6646
  tokadd "\\u" if regexp_literal

  case
  when scan(/\h{4}/) then
    codepoint = [matched.to_i(16)].pack("U")

    tokadd regexp_literal ? matched : codepoint
  when scan(/\{\s*(\h{1,6}(?:\s+\h{1,6})*)\s*\}/) then
    codepoints = match[1].split.map { |s| s.to_i 16 }.pack("U")

    if regexp_literal then
      tokadd "{"
      tokadd match[1].split.join(" ")
      tokadd "}"
    else
      tokadd codepoints
    end
  else
    rb_compile_error "unterminated Unicode escape"
  end
end

#unescape(s) ⇒ Object



828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
# File 'lib/ruby_lexer.rb', line 828

def unescape s
  r = ESCAPES[s]

  return r if r

  x = case s
      when /^[0-7]{1,3}/ then
        ($&.to_i(8) & 0xFF).chr
      when /^x([0-9a-fA-F]{1,2})/ then
        $1.to_i(16).chr
      when /^M-(.)/ then
        ($1[0].ord | 0x80).chr
      when /^(C-|c)(.)/ then
        ($2[0].ord & 0x9f).chr
      when /^[89a-f]/i then # bad octal or hex... ignore? that's what MRI does :(
        s
      when /^[McCx0-9]/ then
        rb_compile_error("Invalid escape character syntax")
      when /u(\h{4})/ then
        [$1.delete("{}").to_i(16)].pack("U")
      when /u(\h{1,3})/ then
        rb_compile_error("Invalid escape character syntax")
      when /u\{(\h+(?:\s+\h+)*)\}/ then
        $1.split.map { |cp| cp.to_i(16) }.pack("U*")
      else
        s
      end
  x
end

#unescape_string(str) ⇒ Object



819
820
821
822
823
824
825
826
# File 'lib/ruby_lexer.rb', line 819

def unescape_string str
  str = str.gsub(ESC) { unescape($1).b.force_encoding Encoding::UTF_8 }
  if str.valid_encoding?
    str
  else
    str.b
  end
end

#warning(s) ⇒ Object



858
859
860
# File 'lib/ruby_lexer.rb', line 858

def warning s
  # do nothing for now
end

#was_label?Boolean

Returns:

  • (Boolean)


862
863
864
865
# File 'lib/ruby_lexer.rb', line 862

def was_label?
  @was_label = ruby22_label?
  true
end