Class: RubyLex

Inherits:

Object

Object
RubyLex

show all

Extended by:: Exception2MessageMapper

Includes:: RubyToken

Defined in:: lib/irb/ruby-lex.rb

Constant Summary collapse

ENINDENT_CLAUSE =

[
  "case", "class", "def", "do", "for", "if",
  "module", "unless", "until", "while", "begin" #, "when"
]

DEINDENT_CLAUSE = , "when"

["end" #, "when"
]

PERCENT_LTYPE =

{
  "q" => "\'",
  "Q" => "\"",
  "x" => "\`",
  "r" => "/",
  "w" => "]",
  "W" => "]",
  "s" => ":"
}

PERCENT_PAREN =

{
  "{" => "}",
  "[" => "]",
  "<" => ">",
  "(" => ")"
}

Ltype2Token =

{
  "\'" => TkSTRING,
  "\"" => TkSTRING,
  "\`" => TkXSTRING,
  "/" => TkREGEXP,
  "]" => TkDSTRING,
  ":" => TkSYMBOL
}

DLtype2Token =

{
  "\"" => TkDSTRING,
  "\`" => TkDXSTRING,
  "/" => TkDREGEXP,
}

Constants included from RubyToken

RubyToken::EXPR_ARG, RubyToken::EXPR_BEG, RubyToken::EXPR_CLASS, RubyToken::EXPR_DOT, RubyToken::EXPR_END, RubyToken::EXPR_FNAME, RubyToken::EXPR_MID, RubyToken::TkReading2Token, RubyToken::TkSymbol2Token, RubyToken::TokenDefinitions

Class Attribute Summary collapse

.debug_level ⇒ Object

Returns the value of attribute debug_level.

Instance Attribute Summary collapse

#char_no ⇒ Object readonly

Returns the value of attribute char_no.
#exception_on_syntax_error ⇒ Object

Returns the value of attribute exception_on_syntax_error.
#indent ⇒ Object readonly

Returns the value of attribute indent.
#line_no ⇒ Object readonly

Returns the value of attribute line_no.
#readed_auto_clean_up ⇒ Object

Returns the value of attribute readed_auto_clean_up.
#seek ⇒ Object readonly

Returns the value of attribute seek.
#skip_space ⇒ Object

Returns the value of attribute skip_space.

Class Method Summary collapse

.debug? ⇒ Boolean

Instance Method Summary collapse

Methods included from RubyToken

#Token, def_token

Constructor Details

#initialize ⇒ `RubyLex`

Returns a new instance of RubyLex.

# File 'lib/irb/ruby-lex.rb', line 39

def initialize
  lex_init
  set_input(STDIN)

  @seek = 0
  @exp_line_no = @line_no = 1
  @base_char_no = 0
  @char_no = 0
  @rests = []
  @readed = []
  @here_readed = []

  @indent = 0
  @indent_stack = []
  @lex_state = EXPR_BEG
  @space_seen = false
  @here_header = false

  @continue = false
  @line = ""

  @skip_space = false
  @readed_auto_clean_up = false
  @exception_on_syntax_error = true

  @prompt = nil
end

Class Attribute Details

.debug_level ⇒ `Object`

Returns the value of attribute debug_level



32
33
34

# File 'lib/irb/ruby-lex.rb', line 32

def debug_level
  @debug_level
end

Instance Attribute Details

#char_no ⇒ `Object` (readonly)

Returns the value of attribute char_no



72
73
74

# File 'lib/irb/ruby-lex.rb', line 72

def char_no
  @char_no
end

#exception_on_syntax_error ⇒ `Object`

Returns the value of attribute exception_on_syntax_error



69
70
71

# File 'lib/irb/ruby-lex.rb', line 69

def exception_on_syntax_error
  @exception_on_syntax_error
end

#indent ⇒ `Object` (readonly)

Returns the value of attribute indent



74
75
76

# File 'lib/irb/ruby-lex.rb', line 74

def indent
  @indent
end

#line_no ⇒ `Object` (readonly)

Returns the value of attribute line_no



73
74
75

# File 'lib/irb/ruby-lex.rb', line 73

def line_no
  @line_no
end

#readed_auto_clean_up ⇒ `Object`

Returns the value of attribute readed_auto_clean_up



68
69
70

# File 'lib/irb/ruby-lex.rb', line 68

def readed_auto_clean_up
  @readed_auto_clean_up
end

#seek ⇒ `Object` (readonly)

Returns the value of attribute seek



71
72
73

# File 'lib/irb/ruby-lex.rb', line 71

def seek
  @seek
end

#skip_space ⇒ `Object`

Returns the value of attribute skip_space



67
68
69

# File 'lib/irb/ruby-lex.rb', line 67

def skip_space
  @skip_space
end

Class Method Details

.debug? ⇒ `Boolean`

Returns:

(Boolean)



33
34
35

# File 'lib/irb/ruby-lex.rb', line 33

def debug?
  @debug_level > 0
end

Instance Method Details

#each_top_level_statement ⇒ `Object`

# File 'lib/irb/ruby-lex.rb', line 226

def each_top_level_statement
  initialize_input
  catch(:TERM_INPUT) do
    loop do
	begin
 @continue = false
 prompt
 unless l = lex
   throw :TERM_INPUT if @line == ''
 else
   @line.concat l
   if @ltype or @continue or @indent > 0
     next
   end
 end
 if @line != "\n"
          @line.force_encoding(@io.encoding)
   yield @line, @exp_line_no
 end
 break unless l
 @line = ''
 @exp_line_no = @line_no

 @indent = 0
 @indent_stack = []
 prompt
	rescue TerminateLineInput
 initialize_input
 prompt
 get_readed
	end
    end
  end
end

#eof? ⇒ `Boolean`

Returns:

(Boolean)



131
132
133

# File 'lib/irb/ruby-lex.rb', line 131

def eof?
  @io.eof?
end

#get_readed ⇒ `Object`

# File 'lib/irb/ruby-lex.rb', line 88

def get_readed
  if idx = @readed.reverse.index("\n")
    @base_char_no = idx
  else
    @base_char_no += @readed.size
  end

  readed = @readed.join("")
  @readed = []
  readed
end

#getc ⇒ `Object`

# File 'lib/irb/ruby-lex.rb', line 100

def getc
  while @rests.empty?
#      return nil unless buf_input
    @rests.push nil unless buf_input
  end
  c = @rests.shift
  if @here_header
    @here_readed.push c
  else
    @readed.push c
  end
  @seek += 1
  if c == "\n"
    @line_no += 1
    @char_no = 0
  else
    @char_no += 1
  end
  c
end

#getc_of_rests ⇒ `Object`

# File 'lib/irb/ruby-lex.rb', line 135

def getc_of_rests
  if @rests.empty?
    nil
  else
    getc
  end
end

#gets ⇒ `Object`

# File 'lib/irb/ruby-lex.rb', line 121

def gets
  l = ""
  while c = getc
    l.concat(c)
    break if c == "\n"
  end
  return nil if l == "" and c.nil?
  l
end

#identify_comment ⇒ `Object`

# File 'lib/irb/ruby-lex.rb', line 1123

def identify_comment
  @ltype = "#"

  while ch = getc
#      if ch == "\\" #"
#	read_escape
#      end
    if ch == "\n"
	@ltype = nil
	ungetc
	break
    end
  end
  return Token(TkCOMMENT)
end

#identify_gvar ⇒ `Object`

# File 'lib/irb/ruby-lex.rb', line 740

def identify_gvar
  @lex_state = EXPR_END

  case ch = getc
  when /[~_*$?!@\/\\;,=:<>".]/   #"
    Token(TkGVAR, "$" + ch)
  when "-"
    Token(TkGVAR, "$-" + getc)
  when "&", "`", "'", "+"
    Token(TkBACK_REF, "$"+ch)
  when /[1-9]/
    while getc =~ /[0-9]/; end
    ungetc
    Token(TkNTH_REF)
  when /\w/
    ungetc
    ungetc
    identify_identifier
  else
    ungetc
    Token("$")
  end
end

#identify_here_document ⇒ `Object`

# File 'lib/irb/ruby-lex.rb', line 873

def identify_here_document
  ch = getc
#    if lt = PERCENT_LTYPE[ch]
  if ch == "-"
    ch = getc
    indent = true
  end
  if /['"`]/ =~ ch
    lt = ch
    quoted = ""
    while (c = getc) && c != lt
	quoted.concat c
    end
  else
    lt = '"'
    quoted = ch.dup
    while (c = getc) && c =~ /\w/
	quoted.concat c
    end
    ungetc
  end

  ltback, @ltype = @ltype, lt
  reserve = []
  while ch = getc
    reserve.push ch
    if ch == "\\"
	reserve.push ch = getc
    elsif ch == "\n"
	break
    end
  end

  @here_header = false
  while l = gets
    l = l.sub(/(:?\r)?\n\z/, '')
    if (indent ? l.strip : l) == quoted
break
    end
  end

  @here_header = true
  @here_readed.concat reserve
  while ch = reserve.pop
    ungetc ch
  end

  @ltype = ltback
  @lex_state = EXPR_END
  Token(Ltype2Token[lt])
end

#identify_identifier ⇒ `Object`

# File 'lib/irb/ruby-lex.rb', line 764

def identify_identifier
  token = ""
  if peek(0) =~ /[$@]/
    token.concat(c = getc)
    if c == "@" and peek(0) == "@"
	token.concat getc
    end
  end

  while (ch = getc) =~ /[^\x00-\/:-@\[-^`{-\x7F]/
    print ":", ch, ":" if RubyLex.debug?
    token.concat ch
  end
  ungetc

  if (ch == "!" || ch == "?") && token[0,1] =~ /\w/ && peek(0) != "="
    token.concat getc
  end

  # almost fix token

  case token
  when /^\$/
    return Token(TkGVAR, token)
  when /^\@\@/
    @lex_state = EXPR_END
    # p Token(TkCVAR, token)
    return Token(TkCVAR, token)
  when /^\@/
    @lex_state = EXPR_END
    return Token(TkIVAR, token)
  end

  if @lex_state != EXPR_DOT
    print token, "\n" if RubyLex.debug?

    token_c, *trans = TkReading2Token[token]
    if token_c
	# reserved word?

	if (@lex_state != EXPR_BEG &&
   @lex_state != EXPR_FNAME &&
   trans[1])
 # modifiers
 token_c = TkSymbol2Token[trans[1]]
 @lex_state = trans[0]
	else
 if @lex_state != EXPR_FNAME
   if ENINDENT_CLAUSE.include?(token)
     # check for ``class = val'' etc.
     valid = true
     case token
     when "class"
valid = false unless peek_match?(/^\s*(<<|\w|::)/)
     when "def"
valid = false if peek_match?(/^\s*(([+\-\/*&\|^]|<<|>>|\|\||\&\&)=|\&\&|\|\|)/)
     when "do"
valid = false if peek_match?(/^\s*([+\-\/*]?=|\*|<|>|\&)/)
     when *ENINDENT_CLAUSE
valid = false if peek_match?(/^\s*([+\-\/*]?=|\*|<|>|\&|\|)/)
     else
# no nothing
     end
     if valid
if token == "do"
  if ![TkFOR, TkWHILE, TkUNTIL].include?(@indent_stack.last)
    @indent += 1
    @indent_stack.push token_c
  end
else
  @indent += 1
  @indent_stack.push token_c
end
#		p @indent_stack
     end

   elsif DEINDENT_CLAUSE.include?(token)
     @indent -= 1
     @indent_stack.pop
   end
   @lex_state = trans[0]
 else
   @lex_state = EXPR_END
 end
	end
	return Token(token_c, token)
    end
  end

  if @lex_state == EXPR_FNAME
    @lex_state = EXPR_END
    if peek(0) == '='
	token.concat getc
    end
  elsif @lex_state == EXPR_BEG || @lex_state == EXPR_DOT
    @lex_state = EXPR_ARG
  else
    @lex_state = EXPR_END
  end

  if token[0, 1] =~ /[A-Z]/
    return Token(TkCONSTANT, token)
  elsif token[token.size - 1, 1] =~ /[!?]/
    return Token(TkFID, token)
  else
    return Token(TkIDENTIFIER, token)
  end
end

#identify_number ⇒ `Object`

# File 'lib/irb/ruby-lex.rb', line 943

def identify_number
  @lex_state = EXPR_END

  if peek(0) == "0" && peek(1) !~ /[.eE]/
    getc
    case peek(0)
    when /[xX]/
	ch = getc
	match = /[0-9a-fA-F_]/
    when /[bB]/
	ch = getc
	match = /[01_]/
    when /[oO]/
	ch = getc
	match = /[0-7_]/
    when /[dD]/
	ch = getc
	match = /[0-9_]/
    when /[0-7]/
	match = /[0-7_]/
    when /[89]/
	RubyLex.fail SyntaxError, "Invalid octal digit"
    else
	return Token(TkINTEGER)
    end

    len0 = true
    non_digit = false
    while ch = getc
	if match =~ ch
 if ch == "_"
   if non_digit
     RubyLex.fail SyntaxError, "trailing `#{ch}' in number"
   else
     non_digit = ch
   end
 else
   non_digit = false
   len0 = false
 end
	else
 ungetc
 if len0
   RubyLex.fail SyntaxError, "numeric literal without digits"
 end
 if non_digit
   RubyLex.fail SyntaxError, "trailing `#{non_digit}' in number"
 end
 break
	end
    end
    return Token(TkINTEGER)
  end

  type = TkINTEGER
  allow_point = true
  allow_e = true
  non_digit = false
  while ch = getc
    case ch
    when /[0-9]/
	non_digit = false
    when "_"
	non_digit = ch
    when allow_point && "."
	if non_digit
 RubyLex.fail SyntaxError, "trailing `#{non_digit}' in number"
	end
	type = TkFLOAT
	if peek(0) !~ /[0-9]/
 type = TkINTEGER
 ungetc
 break
	end
	allow_point = false
    when allow_e && "e", allow_e && "E"
	if non_digit
 RubyLex.fail SyntaxError, "trailing `#{non_digit}' in number"
	end
	type = TkFLOAT
	if peek(0) =~ /[+-]/
 getc
	end
	allow_e = false
	allow_point = false
	non_digit = ch
    else
	if non_digit
 RubyLex.fail SyntaxError, "trailing `#{non_digit}' in number"
	end
	ungetc
	break
    end
  end
  Token(type)
end

#identify_quotation ⇒ `Object`

# File 'lib/irb/ruby-lex.rb', line 925

def identify_quotation
  ch = getc
  if lt = PERCENT_LTYPE[ch]
    ch = getc
  elsif ch =~ /\W/
    lt = "\""
  else
    RubyLex.fail SyntaxError, "unknown type of %string"
  end
#     if ch !~ /\W/
#       ungetc
#       next
#     end
  #@ltype = lt
  @quoted = ch unless @quoted = PERCENT_PAREN[ch]
  identify_string(lt, @quoted)
end

#identify_string(ltype, quoted = ltype) ⇒ `Object`

# File 'lib/irb/ruby-lex.rb', line 1040

def identify_string(ltype, quoted = ltype)
  @ltype = ltype
  @quoted = quoted
  subtype = nil
  begin
    nest = 0
    while ch = getc
	if @quoted == ch and nest == 0
 break
	elsif @ltype != "'" && ch == "#" && peek(0) == "{"
 identify_string_dvar
	elsif @ltype != "'" && @ltype != "]" && @ltype != ":" and ch == "#"
 subtype = true
	elsif ch == '\\' and @ltype == "'" #'
 case ch = getc
 when "\\", "\n", "'"
 else
   ungetc
 end
	elsif ch == '\\' #'
 read_escape
	end
	if PERCENT_PAREN.values.include?(@quoted)
 if PERCENT_PAREN[ch] == @quoted
   nest += 1
 elsif ch == @quoted
   nest -= 1
 end
	end
    end
    if @ltype == "/"
      while /[imxoesun]/ =~ peek(0)
 getc
	end
    end
    if subtype
	Token(DLtype2Token[ltype])
    else
	Token(Ltype2Token[ltype])
    end
  ensure
    @ltype = nil
    @quoted = nil
    @lex_state = EXPR_END
  end
end

#identify_string_dvar ⇒ `Object`

# File 'lib/irb/ruby-lex.rb', line 1087

def identify_string_dvar
  begin
    getc

    reserve_continue = @continue
    reserve_ltype = @ltype
    reserve_indent = @indent
    reserve_indent_stack = @indent_stack
    reserve_state = @lex_state
    reserve_quoted = @quoted

    @ltype = nil
    @quoted = nil
    @indent = 0
    @indent_stack = []
    @lex_state = EXPR_BEG

    loop do
	@continue = false
	prompt
	tk = token
	if @ltype or @continue or @indent > 0
 next
	end
	break if tk.kind_of?(TkRBRACE)
    end
  ensure
    @continue = reserve_continue
    @ltype = reserve_ltype
    @indent = reserve_indent
    @indent_stack = reserve_indent_stack
    @lex_state = reserve_state
    @quoted = reserve_quoted
  end
end

#initialize_input ⇒ `Object`

# File 'lib/irb/ruby-lex.rb', line 210

def initialize_input
  @ltype = nil
  @quoted = nil
  @indent = 0
  @indent_stack = []
  @lex_state = EXPR_BEG
  @space_seen = false
  @here_header = false

  @continue = false
  prompt

  @line = ""
  @exp_line_no = @line_no
end

#lex ⇒ `Object`

# File 'lib/irb/ruby-lex.rb', line 261

def lex
  until (((tk = token).kind_of?(TkNL) || tk.kind_of?(TkEND_OF_SCRIPT)) &&
    !@continue or
    tk.nil?)
    #p tk
    #p @lex_state
    #p self
  end
  line = get_readed
  #      print self.inspect
  if line == "" and tk.kind_of?(TkEND_OF_SCRIPT) || tk.nil?
    nil
  else
    line
  end
end

#lex_init ⇒ `Object`

# File 'lib/irb/ruby-lex.rb', line 338

def lex_init()
  @OP = IRB::SLex.new
  @OP.def_rules("\0", "\004", "\032") do |op, io|
    Token(TkEND_OF_SCRIPT)
  end

  @OP.def_rules(" ", "\t", "\f", "\r", "\13") do |op, io|
    @space_seen = true
    while getc =~ /[ \t\f\r\13]/; end
    ungetc
    Token(TkSPACE)
  end

  @OP.def_rule("#") do |op, io|
    identify_comment
  end

  @OP.def_rule("=begin",
 proc{|op, io| @prev_char_no == 0 && peek(0) =~ /\s/}) do
    |op, io|
    @ltype = "="
    until getc == "\n"; end
    until peek_equal?("=end") && peek(4) =~ /\s/
	until getc == "\n"; end
    end
    gets
    @ltype = nil
    Token(TkRD_COMMENT)
  end

  @OP.def_rule("\n") do |op, io|
    print "\\n\n" if RubyLex.debug?
    case @lex_state
    when EXPR_BEG, EXPR_FNAME, EXPR_DOT
	@continue = true
    else
	@continue = false
	@lex_state = EXPR_BEG
	until (@indent_stack.empty? ||
      [TkLPAREN, TkLBRACK, TkLBRACE,
 TkfLPAREN, TkfLBRACK, TkfLBRACE].include?(@indent_stack.last))
 @indent_stack.pop
	end
    end
    @here_header = false
    @here_readed = []
    Token(TkNL)
  end

  @OP.def_rules("*", "**",
  "=", "==", "===",
  "=~", "<=>",
  "<", "<=",
  ">", ">=", ">>",
  "!", "!=", "!~") do
    |op, io|
    case @lex_state
    when EXPR_FNAME, EXPR_DOT
	@lex_state = EXPR_ARG
    else
	@lex_state = EXPR_BEG
    end
    Token(op)
  end

  @OP.def_rules("<<") do
    |op, io|
    tk = nil
    if @lex_state != EXPR_END && @lex_state != EXPR_CLASS &&
 (@lex_state != EXPR_ARG || @space_seen)
	c = peek(0)
	if /\S/ =~ c && (/["'`]/ =~ c || /\w/ =~ c || c == "-")
 tk = identify_here_document
	end
    end
    unless tk
	tk = Token(op)
	case @lex_state
	when EXPR_FNAME, EXPR_DOT
 @lex_state = EXPR_ARG
	else
 @lex_state = EXPR_BEG
	end
    end
    tk
  end

  @OP.def_rules("'", '"') do
    |op, io|
    identify_string(op)
  end

  @OP.def_rules("`") do
    |op, io|
    if @lex_state == EXPR_FNAME
	@lex_state = EXPR_END
	Token(op)
    else
	identify_string(op)
    end
  end

  @OP.def_rules('?') do
    |op, io|
    if @lex_state == EXPR_END
	@lex_state = EXPR_BEG
	Token(TkQUESTION)
    else
	ch = getc
	if @lex_state == EXPR_ARG && ch =~ /\s/
 ungetc
 @lex_state = EXPR_BEG;
 Token(TkQUESTION)
	else
 if (ch == '\\')
   read_escape
 end
 @lex_state = EXPR_END
 Token(TkINTEGER)
	end
    end
  end

  @OP.def_rules("&", "&&", "|", "||") do
    |op, io|
    @lex_state = EXPR_BEG
    Token(op)
  end

  @OP.def_rules("+=", "-=", "*=", "**=",
  "&=", "|=", "^=", "<<=", ">>=", "||=", "&&=") do
    |op, io|
    @lex_state = EXPR_BEG
    op =~ /^(.*)=$/
    Token(TkOPASGN, $1)
  end

  @OP.def_rule("+@", proc{|op, io| @lex_state == EXPR_FNAME}) do
    |op, io|
    @lex_state = EXPR_ARG
    Token(op)
  end

  @OP.def_rule("-@", proc{|op, io| @lex_state == EXPR_FNAME}) do
    |op, io|
    @lex_state = EXPR_ARG
    Token(op)
  end

  @OP.def_rules("+", "-") do
    |op, io|
    catch(:RET) do
	if @lex_state == EXPR_ARG
 if @space_seen and peek(0) =~ /[0-9]/
   throw :RET, identify_number
 else
   @lex_state = EXPR_BEG
 end
	elsif @lex_state != EXPR_END and peek(0) =~ /[0-9]/
 throw :RET, identify_number
	else
 @lex_state = EXPR_BEG
	end
	Token(op)
    end
  end

  @OP.def_rule(".") do
    |op, io|
    @lex_state = EXPR_BEG
    if peek(0) =~ /[0-9]/
	ungetc
	identify_number
    else
	# for "obj.if" etc.
	@lex_state = EXPR_DOT
	Token(TkDOT)
    end
  end

  @OP.def_rules("..", "...") do
    |op, io|
    @lex_state = EXPR_BEG
    Token(op)
  end

  lex_int2
end

#lex_int2 ⇒ `Object`

# File 'lib/irb/ruby-lex.rb', line 527

def lex_int2
  @OP.def_rules("]", "}", ")") do
    |op, io|
    @lex_state = EXPR_END
    @indent -= 1
    @indent_stack.pop
    Token(op)
  end

  @OP.def_rule(":") do
    |op, io|
    if @lex_state == EXPR_END || peek(0) =~ /\s/
	@lex_state = EXPR_BEG
	Token(TkCOLON)
    else
	@lex_state = EXPR_FNAME;
	Token(TkSYMBEG)
    end
  end

  @OP.def_rule("::") do
     |op, io|
#      p @lex_state.id2name, @space_seen
    if @lex_state == EXPR_BEG or @lex_state == EXPR_ARG && @space_seen
	@lex_state = EXPR_BEG
	Token(TkCOLON3)
    else
	@lex_state = EXPR_DOT
	Token(TkCOLON2)
    end
  end

  @OP.def_rule("/") do
    |op, io|
    if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
	identify_string(op)
    elsif peek(0) == '='
	getc
	@lex_state = EXPR_BEG
	Token(TkOPASGN, "/") #/)
    elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/
	identify_string(op)
    else
	@lex_state = EXPR_BEG
	Token("/") #/)
    end
  end

  @OP.def_rules("^") do
    |op, io|
    @lex_state = EXPR_BEG
    Token("^")
  end

  #       @OP.def_rules("^=") do
  # 	@lex_state = EXPR_BEG
  # 	Token(OP_ASGN, :^)
  #       end

  @OP.def_rules(",") do
    |op, io|
    @lex_state = EXPR_BEG
    Token(op)
  end

  @OP.def_rules(";") do
    |op, io|
    @lex_state = EXPR_BEG
    until (@indent_stack.empty? ||
    [TkLPAREN, TkLBRACK, TkLBRACE,
      TkfLPAREN, TkfLBRACK, TkfLBRACE].include?(@indent_stack.last))
	@indent_stack.pop
    end
    Token(op)
  end

  @OP.def_rule("~") do
    |op, io|
    @lex_state = EXPR_BEG
    Token("~")
  end

  @OP.def_rule("~@", proc{|op, io| @lex_state == EXPR_FNAME}) do
    |op, io|
    @lex_state = EXPR_BEG
    Token("~")
  end

  @OP.def_rule("(") do
    |op, io|
    @indent += 1
    if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
	@lex_state = EXPR_BEG
	tk_c = TkfLPAREN
    else
	@lex_state = EXPR_BEG
	tk_c = TkLPAREN
    end
    @indent_stack.push tk_c
    Token(tk_c)
  end

  @OP.def_rule("[]", proc{|op, io| @lex_state == EXPR_FNAME}) do
    |op, io|
    @lex_state = EXPR_ARG
    Token("[]")
  end

  @OP.def_rule("[]=", proc{|op, io| @lex_state == EXPR_FNAME}) do
    |op, io|
    @lex_state = EXPR_ARG
    Token("[]=")
  end

  @OP.def_rule("[") do
    |op, io|
    @indent += 1
    if @lex_state == EXPR_FNAME
	tk_c = TkfLBRACK
    else
	if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
 tk_c = TkLBRACK
	elsif @lex_state == EXPR_ARG && @space_seen
 tk_c = TkLBRACK
	else
 tk_c = TkfLBRACK
	end
	@lex_state = EXPR_BEG
    end
    @indent_stack.push tk_c
    Token(tk_c)
  end

  @OP.def_rule("{") do
    |op, io|
    @indent += 1
    if @lex_state != EXPR_END && @lex_state != EXPR_ARG
	tk_c = TkLBRACE
    else
	tk_c = TkfLBRACE
    end
    @lex_state = EXPR_BEG
    @indent_stack.push tk_c
    Token(tk_c)
  end

  @OP.def_rule('\\') do
    |op, io|
    if getc == "\n"
	@space_seen = true
	@continue = true
	Token(TkSPACE)
    else
	read_escape
	Token("\\")
    end
  end

  @OP.def_rule('%') do
    |op, io|
    if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
	identify_quotation
    elsif peek(0) == '='
	getc
	Token(TkOPASGN, :%)
    elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/
	identify_quotation
    else
	@lex_state = EXPR_BEG
	Token("%") #))
    end
  end

  @OP.def_rule('$') do
    |op, io|
    identify_gvar
  end

  @OP.def_rule('@') do
    |op, io|
    if peek(0) =~ /[\w@]/
	ungetc
	identify_identifier
    else
	Token("@")
    end
  end

  #       @OP.def_rule("def", proc{|op, io| /\s/ =~ io.peek(0)}) do
  # 	|op, io|
  # 	@indent += 1
  # 	@lex_state = EXPR_FNAME
  # #	@lex_state = EXPR_END
  # #	until @rests[0] == "\n" or @rests[0] == ";"
  # #	  rests.shift
  # #	end
  #       end

  @OP.def_rule("") do
    |op, io|
    printf "MATCH: start %s: %s\n", op, io.inspect if RubyLex.debug?
    if peek(0) =~ /[0-9]/
	t = identify_number
    elsif peek(0) =~ /[^\x00-\/:-@\[-^`{-\x7F]/
	t = identify_identifier
    end
    printf "MATCH: end %s: %s\n", op, io.inspect if RubyLex.debug?
    t
  end

  p @OP if RubyLex.debug?
end

#peek(i = 0) ⇒ `Object`

# File 'lib/irb/ruby-lex.rb', line 179

def peek(i = 0)
  while @rests.size <= i
    return nil unless buf_input
  end
  @rests[i]
end

#peek_equal?(str) ⇒ `Boolean`

Returns:

(Boolean)

# File 'lib/irb/ruby-lex.rb', line 164

def peek_equal?(str)
  chrs = str.split(//)
  until @rests.size >= chrs.size
    return false unless buf_input
  end
  @rests[0, chrs.size] == chrs
end

#peek_match?(regexp) ⇒ `Boolean`

Returns:

(Boolean)

# File 'lib/irb/ruby-lex.rb', line 172

def peek_match?(regexp)
  while @rests.empty?
    return false unless buf_input
  end
  regexp =~ @rests.join("")
end

#prompt ⇒ `Object`

# File 'lib/irb/ruby-lex.rb', line 204

def prompt
  if @prompt
    @prompt.call(@ltype, @indent, @continue, @line_no)
  end
end

#read_escape ⇒ `Object`

# File 'lib/irb/ruby-lex.rb', line 1139

def read_escape
  case ch = getc
  when "\n", "\r", "\f"
  when "\\", "n", "t", "r", "f", "v", "a", "e", "b", "s" #"
  when /[0-7]/
    ungetc ch
    3.times do
	case ch = getc
	when /[0-7]/
	when nil
 break
	else
 ungetc
 break
	end
    end

  when "x"
    2.times do
	case ch = getc
	when /[0-9a-fA-F]/
	when nil
 break
	else
 ungetc
 break
	end
    end

  when "M"
    if (ch = getc) != '-'
	ungetc
    else
	if (ch = getc) == "\\" #"
 read_escape
	end
    end

  when "C", "c" #, "^"
    if ch == "C" and (ch = getc) != "-"
	ungetc
    elsif (ch = getc) == "\\" #"
	read_escape
    end
  else
    # other characters
  end
end

#set_input(io, p = nil, &block) ⇒ `Object`

io functions

# File 'lib/irb/ruby-lex.rb', line 77

def set_input(io, p = nil, &block)
  @io = io
  if p.respond_to?(:call)
    @input = p
  elsif block_given?
    @input = block
  else
    @input = Proc.new{@io.gets}
  end
end

#set_prompt(p = nil, &block) ⇒ `Object`

# File 'lib/irb/ruby-lex.rb', line 195

def set_prompt(p = nil, &block)
  p = block if block_given?
  if p.respond_to?(:call)
    @prompt = p
  else
    @prompt = Proc.new{print p}
  end
end

#token ⇒ `Object`

# File 'lib/irb/ruby-lex.rb', line 278

def token
  #      require "tracer"
  #      Tracer.on
  @prev_seek = @seek
  @prev_line_no = @line_no
  @prev_char_no = @char_no
  begin
    begin
	tk = @OP.match(self)
	@space_seen = tk.kind_of?(TkSPACE)
    rescue SyntaxError
	raise if @exception_on_syntax_error
	tk = TkError.new(@seek, @line_no, @char_no)
    end
  end while @skip_space and tk.kind_of?(TkSPACE)
  if @readed_auto_clean_up
    get_readed
  end
  #      Tracer.off
  tk
end

#ungetc(c = nil) ⇒ `Object`

# File 'lib/irb/ruby-lex.rb', line 143

def ungetc(c = nil)
  if @here_readed.empty?
    c2 = @readed.pop
  else
    c2 = @here_readed.pop
  end
  c = c2 unless c
  @rests.unshift c #c =
  @seek -= 1
  if c == "\n"
    @line_no -= 1
    if idx = @readed.reverse.index("\n")
	@char_no = @readed.size - idx
    else
	@char_no = @base_char_no + @readed.size
    end
  else
    @char_no -= 1
  end
end

Class: RubyLex

Constant Summary collapse

Constants included from RubyToken

Class Attribute Summary collapse

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Methods included from RubyToken

Constructor Details

#initialize ⇒ RubyLex

Class Attribute Details

.debug_level ⇒ Object

Instance Attribute Details

#char_no ⇒ Object (readonly)

#exception_on_syntax_error ⇒ Object

#indent ⇒ Object (readonly)

#line_no ⇒ Object (readonly)

#readed_auto_clean_up ⇒ Object

#seek ⇒ Object (readonly)

#skip_space ⇒ Object

Class Method Details

.debug? ⇒ Boolean

Instance Method Details

#each_top_level_statement ⇒ Object

#eof? ⇒ Boolean

#get_readed ⇒ Object

#getc ⇒ Object

#getc_of_rests ⇒ Object

#gets ⇒ Object

#identify_comment ⇒ Object

#identify_gvar ⇒ Object

#identify_here_document ⇒ Object

#identify_identifier ⇒ Object

#identify_number ⇒ Object

#identify_quotation ⇒ Object

#identify_string(ltype, quoted = ltype) ⇒ Object

#identify_string_dvar ⇒ Object

#initialize_input ⇒ Object

#lex ⇒ Object

#lex_init ⇒ Object

#lex_int2 ⇒ Object

#peek(i = 0) ⇒ Object

#peek_equal?(str) ⇒ Boolean

#peek_match?(regexp) ⇒ Boolean

#prompt ⇒ Object

#read_escape ⇒ Object

#set_input(io, p = nil, &block) ⇒ Object

#set_prompt(p = nil, &block) ⇒ Object

#token ⇒ Object

#ungetc(c = nil) ⇒ Object

#initialize ⇒ `RubyLex`

.debug_level ⇒ `Object`

#char_no ⇒ `Object` (readonly)

#exception_on_syntax_error ⇒ `Object`

#indent ⇒ `Object` (readonly)

#line_no ⇒ `Object` (readonly)

#readed_auto_clean_up ⇒ `Object`

#seek ⇒ `Object` (readonly)

#skip_space ⇒ `Object`

.debug? ⇒ `Boolean`

#each_top_level_statement ⇒ `Object`

#eof? ⇒ `Boolean`

#get_readed ⇒ `Object`

#getc ⇒ `Object`

#getc_of_rests ⇒ `Object`

#gets ⇒ `Object`

#identify_comment ⇒ `Object`

#identify_gvar ⇒ `Object`

#identify_here_document ⇒ `Object`

#identify_identifier ⇒ `Object`

#identify_number ⇒ `Object`

#identify_quotation ⇒ `Object`

#identify_string(ltype, quoted = ltype) ⇒ `Object`

#identify_string_dvar ⇒ `Object`

#initialize_input ⇒ `Object`

#lex ⇒ `Object`

#lex_init ⇒ `Object`

#lex_int2 ⇒ `Object`

#peek(i = 0) ⇒ `Object`

#peek_equal?(str) ⇒ `Boolean`

#peek_match?(regexp) ⇒ `Boolean`

#prompt ⇒ `Object`

#read_escape ⇒ `Object`

#set_input(io, p = nil, &block) ⇒ `Object`

#set_prompt(p = nil, &block) ⇒ `Object`

#token ⇒ `Object`

#ungetc(c = nil) ⇒ `Object`