Class: RubyLexer

Inherits:

Object

Object
RubyLexer

show all

Extended by:: Forwardable

Includes:: Enumerable, NestedContexts

Defined in:: lib/rubylexer.rb,
lib/rubylexer/0.6.rb,
lib/rubylexer/0.7.0.rb,
lib/rubylexer/token.rb,
lib/rubylexer/charset.rb,
lib/rubylexer/context.rb,
lib/rubylexer/rulexer.rb,
lib/rubylexer/version.rb,
lib/rubylexer/rubycode.rb,
lib/rubylexer/charhandler.rb,
lib/rubylexer/symboltable.rb,
lib/rubylexer/tokenprinter.rb

Overview

rubylexer - a ruby lexer written in ruby

Copyright (C) 2004,2005  Caleb Clausen

This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.

This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

Defined Under Namespace

Modules: ErrorToken, NestedContexts, RecursiveRubyLexer, RubyLexer1_9, StillIgnoreToken, TokenPat Classes: AssignmentRhsListEndToken, AssignmentRhsListStartToken, CharHandler, CharSet, DecoratorToken, EndHeaderToken, EoiToken, EscNlToken, FileAndLineToken, HereBodyToken, HerePlaceholderToken, IgnoreToken, ImplicitParamListEndToken, ImplicitParamListStartToken, KeepWsTokenPrinter, KeywordToken, KwParamListEndToken, KwParamListStartToken, MethNameToken, Newline, NewlineToken, NoWsToken, NumberToken, OperatorToken, OutlinedHereBodyToken, RenderExactlyStringToken, RubyCode, SimpleTokenPrinter, StringToken, SubitemToken, SymbolTable, SymbolToken, Token, VarNameToken, WToken, WsToken, ZwToken

Constant Summary collapse

RUBYSYMOPERATORREX =

%r{^([&|^/%]|=(==?)|=~|>[=>]?|<(<|=>?)?|[+~\-]@?|\*\*?|\[\]=?)}

RUBYNONSYMOPERATORREX = (nasty beastie, eh?) these are the overridable operators does not match flow-control operators like: || && ! or and if not or op= ops like: += -= ||= or .. … ?: for that use:

%r{^([%^/\-+|&]=|(\|\||&&)=?|(<<|>>|\*\*?)=|\.{1,3}|[?:,;]|::|=>?|![=~]?)$}

RUBYOPERATORREX =

/#{RUBYSYMOPERATORREX}|#{RUBYNONSYMOPERATORREX}/o

UNSYMOPS = always unary

/^[~!]$/

UBSYMOPS = ops that could be unary or binary

/^([*&+-]|::)$/

WHSPCHARS =

WHSPLF+"\\#"

OPORBEGINWORDLIST =

%w(if unless while until)

BEGINWORDLIST =

%w(def class module begin for case do)+OPORBEGINWORDLIST

OPORBEGINWORDS =

"(#{OPORBEGINWORDLIST.join '|'})"

BEGINWORDS =

/^(#{BEGINWORDLIST.join '|'})$/o

FUNCLIKE_KEYWORDLIST =

%w/break next redo return yield retry super BEGIN END/

FUNCLIKE_KEYWORDS =

/^(#{FUNCLIKE_KEYWORDLIST.join '|'})$/

VARLIKE_KEYWORDLIST =

%w/__FILE__ __LINE__ false nil self true/

VARLIKE_KEYWORDS =

/^(#{VARLIKE_KEYWORDLIST.join '|'})$/

INNERBOUNDINGWORDLIST =

%w"else elsif ensure in then rescue when"

INNERBOUNDINGWORDS =

"(#{INNERBOUNDINGWORDLIST.join '|'})"

BINOPWORDLIST =

%w"and or"

BINOPWORDS =

"(#{BINOPWORDLIST.join '|'})"

RUBYKEYWORDS =

%r{
  ^(alias|#{BINOPWORDS}|defined\?|not|undef|end|
    #{VARLIKE_KEYWORDS}|#{FUNCLIKE_KEYWORDS}|
    #{INNERBOUNDINGWORDS}|#{BEGINWORDS}
  )$
}xo

HIGHASCII = __END__ should not be in this set… its handled in start_of_line_directives

?\x80..?\xFF

NONASCII =

HIGHASCII

CHARMAPPINGS =

{
      ?$ => :dollar_identifier,
      ?@ => :at_identifier,
      ?a..?z => :identifier,
      ?A..?Z => :identifier,
      ?_     => :identifier,
      ?0..?9 => :number,
      ?" => :double_quote,        #"
      ?' => :single_quote,        #'
      ?` => :back_quote,          #`

      WHSP => :whitespace, #includes \r
      ?, => :comma,
      ?; => :semicolon,

      ?^ => :caret,
      ?~ => :tilde,
      ?= => :equals,
      ?! => :exclam,
      ?. => :dot,

      #these ones could signal either an op or a term
      ?/ => :regex_or_div,
      "|" => :conjunction_or_goalpost,
      ">" => :quadriop,
      "*&" => :star_or_amp,        #could be unary
      "+-" => :plusminus, #could be unary
      ?< => :lessthan,
      ?% => :percent,
      ?? => :char_literal_or_op,  #single-char int literal
      ?: => :symbol_or_op,
      ?\n => :newline, #implicitly escaped after op
      #?\r => :newline, #implicitly escaped after op

      ?\\ => :escnewline,

      "[({" => :open_brace,
      "])}" => :close_brace,


      ?# => :comment,

      ?\x00 => :eof,
      ?\x04 => :eof,
      ?\x1a => :eof,

      ?\x01..?\x03 => :illegal_char,
      ?\x05..?\x08 => :illegal_char,
      ?\x0E..?\x19 => :illegal_char,
      ?\x1b..?\x1F => :illegal_char,
      ?\x7F => :illegal_char,
}

UCLETTER =

@@UCLETTER="[A-Z]"

LCLETTER = cheaters way, treats utf chars as always 1 byte wide all high-bit chars are lowercase letters works, but strings compare with strict binary identity, not unicode collation works for euc too, I think (the ruby spec for utf8 support permits this interpretation)

@@LCLETTER="[a-z_\x80-\xFF]"

LETTER =

@@LETTER="[A-Za-z_\x80-\xFF]"

LETTER_DIGIT =

@@LETTER_DIGIT="[A-Za-z_0-9\x80-\xFF]"

NEVERSTARTPARAMLISTWORDS =

/\A(#{OPORBEGINWORDS}|#{INNERBOUNDINGWORDS}|#{BINOPWORDS}|end)((?:(?!#@@LETTER_DIGIT).)|\Z)/om

NEVERSTARTPARAMLISTFIRST = chars that begin NEVERSTARTPARAMLIST

CharSet['aoeitrwu']

NEVERSTARTPARAMLISTMAXLEN = max len of a NEVERSTARTPARAMLIST

ENCODING_ALIASES =

{
 'utf-8'=>'utf8',

 'ascii-8bit'=>'binary',
 'ascii-7bit'=>'ascii',
 'euc-jp'=>'euc',

 'ascii8bit'=>'binary',
 'ascii7bit'=>'ascii',
 'eucjp'=>'euc',

 'us-ascii'=>'ascii',
 'shift-jis'=>'sjis',

 'autodetect'=>'detect',
}

ENCODINGS =

%w[ascii binary utf8 euc sjis]

IMPLICIT_PARENS_BEFORE_ACCESSOR_ASSIGNMENT =

AUTO_UNESCAPE_STRINGS =

false

EndDefHeaderToken =

EndHeaderToken

FASTER_STRING_ESCAPES =

true

WHSP =

" \t\r\v\f"

WHSPLF =

WHSP+"\n"

LEGALCHARS = maybe r should be in WHSPLF instead

/[ -~#{WHSPLF}\x80-\xFF]/

PAIRS =

{ '{'=>'}', '['=>']', '('=>')', '<'=>'>'}

VERSION =

'0.7.7'

Instance Attribute Summary collapse

#file ⇒ Object

hack.
#filename ⇒ Object readonly

Returns the value of attribute filename.
#in_def ⇒ Object

Returns the value of attribute in_def.
#incomplete_here_tokens ⇒ Object readonly

Returns the value of attribute incomplete_here_tokens.
#last_operative_token ⇒ Object readonly

Returns the value of attribute last_operative_token.
#last_token_maybe_implicit ⇒ Object readonly

Returns the value of attribute last_token_maybe_implicit.
#linenum ⇒ Object readonly

Returns the value of attribute linenum.
#localvars_stack ⇒ Object

Returns the value of attribute localvars_stack.
#offset_adjust ⇒ Object readonly

Returns the value of attribute offset_adjust.
#original_file ⇒ Object readonly

Returns the value of attribute original_file.
#parsestack ⇒ Object readonly

Returns the value of attribute parsestack.
#pending_here_bodies ⇒ Object writeonly

Sets the attribute pending_here_bodies.
#rubyversion ⇒ Object readonly

Returns the value of attribute rubyversion.

Instance Method Summary collapse

#_keyword_funclike(str, offset, result) ⇒ Object
#_keyword_innerbounding(str, offset, result) ⇒ Object
#_keyword_varlike(str, offset, result) ⇒ Object
#at_identifier(ch = nil) ⇒ Object

———————————–.
#balanced_braces? ⇒ Boolean

———————————–.
#dollar_identifier(ch = nil) ⇒ Object

———————————–.
#each ⇒ Object

———————————–.
#enable_macros! ⇒ Object

———————————–.
#endoffile_detected(s = '') ⇒ Object (also: #rulexer_endoffile_detected)

———————————–.
#eof? ⇒ Boolean (also: #rulexer_eof?)

———————————–.
#get1token ⇒ Object (also: #rulexer_get1token)

———————————–.
#initialize(filename, file, line, offset_adjust = 0) ⇒ RubyLexer (also: #rulexer_initialize) constructor

———————————–.
#input_position_raw ⇒ Object

———————————–.
#keyword___FILE__(str, offset, result) ⇒ Object
#keyword___LINE__(str, offset, result) ⇒ Object
#keyword_alias(str, offset, result) ⇒ Object
#keyword_begin(str, offset, result) ⇒ Object (also: #keyword_case)
#keyword_class(str, offset, result) ⇒ Object
#keyword_def(str, offset, result) ⇒ Object

macros too, if enabled.
#keyword_do(str, offset, result) ⇒ Object
#keyword_elsif(str, offset, result) ⇒ Object
#keyword_end(str, offset, result) ⇒ Object
#keyword_END(str, offset, result) ⇒ Object
#keyword_for(str, offset, result) ⇒ Object
#keyword_if(str, offset, result) ⇒ Object (also: #keyword_unless)

could be infix form without end.
#keyword_in(str, offset, result) ⇒ Object
#keyword_module(str, offset, result) ⇒ Object
#keyword_rescue(str, offset, result) ⇒ Object
#keyword_return(str, offset, result) ⇒ Object (also: #keyword_break, #keyword_next)
#keyword_then(str, offset, result) ⇒ Object
#keyword_undef(str, offset, result) ⇒ Object
#keyword_when(str, offset, result) ⇒ Object

defined? might have a baresymbol following it does it need to be handled specially? it would seem not.….
#keyword_while(str, offset, result) ⇒ Object (also: #keyword_until)

could be infix form without end.
#localvars ⇒ Object
#no_more? ⇒ Boolean

———————————–.
#progress_printer ⇒ Object
#read_leading_encoding ⇒ Object
#set_last_token(tok) ⇒ Object

———————————–.

Constructor Details

#initialize(filename, file, line, offset_adjust = 0) ⇒ `RubyLexer` Also known as: rulexer_initialize

# File 'lib/rubylexer.rb', line 165

def initialize(filename,file,linenum=1,offset_adjust=0,options={})
   @offset_adjust=0 #set again in next line
   rulexer_initialize(filename,file, linenum,offset_adjust)
   @start_linenum=linenum
   @parsestack=[TopLevelContext.new]
   @incomplete_here_tokens=[] #not used anymore
   @pending_here_bodies=[]
   @localvars_stack=[SymbolTable.new]
   @defining_lvar=nil
   @in_def_name=false
   @last_operative_token=nil
   @last_token_maybe_implicit=nil
   @enable_macro=nil
   @base_file=nil
   @progress_thread=nil
   @rubyversion=options[:rubyversion]||1.8
   @encoding=options[:encoding]||:detect
   @method_operators=if @rubyversion>=1.9 
                       /#{RUBYSYMOPERATORREX}|\A![=~@]?/o
                     else
                       RUBYSYMOPERATORREX
                     end

   @toptable=CharHandler.new(self, :identifier, CHARMAPPINGS)

   extend RubyLexer1_9 if @rubyversion>=1.9
   read_leading_encoding
   start_of_line_directives
   progress_printer
end

Instance Attribute Details

#file ⇒ `Object`

hack



54
55
56

# File 'lib/rubylexer/rulexer.rb', line 54

def file
  @file
end

#filename ⇒ `Object` (readonly)

Returns the value of attribute filename.



53
54
55

# File 'lib/rubylexer/rulexer.rb', line 53

def filename
  @filename
end

#in_def ⇒ `Object`

Returns the value of attribute in_def.



258
259
260

# File 'lib/rubylexer.rb', line 258

def in_def
  @in_def
end

#incomplete_here_tokens ⇒ `Object` (readonly)

Returns the value of attribute incomplete_here_tokens.



130
131
132

# File 'lib/rubylexer.rb', line 130

def incomplete_here_tokens
  @incomplete_here_tokens
end

#last_operative_token ⇒ `Object` (readonly)

Returns the value of attribute last_operative_token.



53
54
55

# File 'lib/rubylexer/rulexer.rb', line 53

def last_operative_token
  @last_operative_token
end

#last_token_maybe_implicit ⇒ `Object` (readonly)

Returns the value of attribute last_token_maybe_implicit.



130
131
132

# File 'lib/rubylexer.rb', line 130

def last_token_maybe_implicit
  @last_token_maybe_implicit
end

#linenum ⇒ `Object` (readonly)

Returns the value of attribute linenum.



53
54
55

# File 'lib/rubylexer/rulexer.rb', line 53

def linenum
  @linenum
end

#localvars_stack ⇒ `Object`

Returns the value of attribute localvars_stack.



256
257
258

# File 'lib/rubylexer.rb', line 256

def localvars_stack
  @localvars_stack
end

#offset_adjust ⇒ `Object` (readonly)

Returns the value of attribute offset_adjust.



259
260
261

# File 'lib/rubylexer.rb', line 259

def offset_adjust
  @offset_adjust
end

#original_file ⇒ `Object` (readonly)

Returns the value of attribute original_file.



53
54
55

# File 'lib/rubylexer/rulexer.rb', line 53

def original_file
  @original_file
end

#parsestack ⇒ `Object` (readonly)

Returns the value of attribute parsestack.



130
131
132

# File 'lib/rubylexer.rb', line 130

def parsestack
  @parsestack
end

#pending_here_bodies=(value) ⇒ `Object` (writeonly)

Sets the attribute pending_here_bodies

Parameters:

value —

the value to set the attribute pending_here_bodies to.



260
261
262

# File 'lib/rubylexer.rb', line 260

def pending_here_bodies=(value)
  @pending_here_bodies = value
end

#rubyversion ⇒ `Object` (readonly)

Returns the value of attribute rubyversion.



261
262
263

# File 'lib/rubylexer.rb', line 261

def rubyversion
  @rubyversion
end

Instance Method Details

#_keyword_funclike(str, offset, result) ⇒ `Object`

# File 'lib/rubylexer.rb', line 1437

def _keyword_funclike(str,offset,result)
      if @last_operative_token===/^(\.|::)$/
        result=yield MethNameToken.new(str) #should pass a methname token here
      else
        tok=KeywordToken.new(str)
        result=yield tok,tok
      end
      return result
end

#_keyword_innerbounding(str, offset, result) ⇒ `Object`

# File 'lib/rubylexer.rb', line 1371

def _keyword_innerbounding(str,offset,result)
      result.unshift(*abort_noparens!(str))
      return result
end

#_keyword_varlike(str, offset, result) ⇒ `Object`

# File 'lib/rubylexer.rb', line 1450

def _keyword_varlike(str,offset,result)
      #do nothing
      return result
end

#at_identifier(ch = nil) ⇒ `Object`

# File 'lib/rubylexer.rb', line 347

def at_identifier(ch=nil)
   result =  (eat_next_if(?@) or return nil)
   result << (eat_next_if(?@) or '')
   if t=identifier_as_string(?@)
     result << t
   else error= "missing @id name"
   end
   result=VarNameToken.new(result)
   result.in_def=true if inside_method_def?
   return lexerror(result,error)
end

#balanced_braces? ⇒ `Boolean`

Returns:

(Boolean)

# File 'lib/rubylexer.rb', line 328

def balanced_braces?

    #@parsestack.empty?
    @parsestack.size==1 and TopLevelContext===@parsestack.first
end

#dollar_identifier(ch = nil) ⇒ `Object`

# File 'lib/rubylexer.rb', line 335

def dollar_identifier(ch=nil)
   s=eat_next_if(?$) or return nil

   if t=((identifier_as_string(?$) or special_global))
     s << t
   else error= "missing $id name"
   end

   return lexerror(VarNameToken.new(s),error)
end

#each ⇒ `Object`

# File 'lib/rubylexer/rulexer.rb', line 99

def each
  begin yield tok = get1token
  end until tok.is_a? EoiToken
end

#enable_macros! ⇒ `Object`

# File 'lib/rubylexer.rb', line 933

def enable_macros!
  @enable_macro="macro"
  class <<self
    alias keyword_macro keyword_def
  end
end

#endoffile_detected(s = '') ⇒ `Object` Also known as: rulexer_endoffile_detected

# File 'lib/rubylexer.rb', line 2848

def endoffile_detected(s='')
  @moretokens.push( *(abort_noparens!.push rulexer_endoffile_detected(s)))
  if @progress_thread
    @progress_thread.kill
    @progress_thread=nil
  end
  result= @moretokens.shift
  balanced_braces? or (lexerror result,"unbalanced braces at eof. parsestack=#{@parsestack.inspect}")
  result
end

#eof? ⇒ `Boolean` Also known as: rulexer_eof?

Returns:

(Boolean)



313
314
315

# File 'lib/rubylexer.rb', line 313

def eof?
  rulexer_eof? or EoiToken===@last_operative_token
end

#get1token ⇒ `Object` Also known as: rulexer_get1token

# File 'lib/rubylexer.rb', line 269

def get1token
   result=rulexer_get1token  #most of the action's here

   if ENV['PROGRESS']
   @last_cp_pos||=0
   @start_time||=Time.now
   if result.offset-@last_cp_pos>100000
     $stderr.puts "#{result.offset} #{Time.now-@start_time}"
     @last_cp_pos=result.offset
   end
   end

   #now cleanup and housekeeping


   #check for bizarre token types
   case result
   when ImplicitParamListStartToken, ImplicitParamListEndToken
       @last_token_maybe_implicit=result
       result
   when StillIgnoreToken#,nil
       result
   when StringToken
       set_last_token result
       assert !(IgnoreToken===@last_operative_token)
       result.elems.map!{|frag|
         if String===frag
           result.translate_escapes(frag)
         else 
           frag
         end
       } if AUTO_UNESCAPE_STRINGS
       result
  
   when Token#,String
       set_last_token result
       assert !(IgnoreToken===@last_operative_token)
       result
   else
       raise "#{@filename}:#{linenum}:token is a #{result.class}, last is #{@last_operative_token}"
   end
end

#input_position_raw ⇒ `Object`



323
324
325

# File 'lib/rubylexer.rb', line 323

def input_position_raw 
  @file.pos
end

#keyword_FILE(str, offset, result) ⇒ `Object`

# File 'lib/rubylexer.rb', line 1418

def keyword___FILE__(str,offset,result)
  result.last.value=@filename
  return result
end

#keyword_LINE(str, offset, result) ⇒ `Object`

# File 'lib/rubylexer.rb', line 1423

def keyword___LINE__(str,offset,result)
  result.last.value=@linenum
  return result
end

#keyword_alias(str, offset, result) ⇒ `Object`

# File 'lib/rubylexer.rb', line 1271

def keyword_alias(str,offset,result)
      safe_recurse { |a|
         set_last_token KeywordToken.new( "alias" )#hack
         result.concat ignored_tokens
         res=symbol(eat_next_if(?:),false) 
         unless res
           lexerror(result.first,"bad symbol in alias")
         else
           res.ident[0]==?$ and res=VarNameToken.new(res.ident,res.offset)
           result<< res
           set_last_token KeywordToken.new( "alias" )#hack
           result.concat ignored_tokens
           res=symbol(eat_next_if(?:),false) 
           unless res
             lexerror(result.first,"bad symbol in alias")
           else
             res.ident[0]==?$ and res=VarNameToken.new(res.ident,res.offset)
             result<< res
           end
         end
      }
      return result
end

#keyword_begin(str, offset, result) ⇒ `Object` Also known as: keyword_case

# File 'lib/rubylexer.rb', line 1091

def keyword_begin(str,offset,result)   
      result.first.has_end!
      @parsestack.push WantsEndContext.new(str,@linenum)
      return result
end

#keyword_class(str, offset, result) ⇒ `Object`

# File 'lib/rubylexer.rb', line 1066

def keyword_class(str,offset,result)
      result.first.has_end!
      @parsestack.push ClassContext.new(str,@linenum)
      return result
end

#keyword_def(str, offset, result) ⇒ `Object`

macros too, if enabled

# File 'lib/rubylexer.rb', line 1140

def keyword_def(str,offset,result)         #macros too, if enabled
      result.first.has_end!
      @parsestack.push ctx=DefContext.new(@linenum)
      ctx.state=:saw_def
   old_moretokens=@moretokens
   @moretokens=[]
   aa=@moretokens
      #safe_recurse { |aa|
         set_last_token KeywordToken.new(str) #hack
         result.concat ignored_tokens

         #read an expr like a.b.c or a::b::c
         #or (expr).b.c
         if nextchar==?( #look for optional parenthesised head
           old_size=@parsestack.size
           parencount=0
           begin
             tok=get1token
             case tok
             when/^\($/.token_pat then parencount+=1
             when/^\)$/.token_pat then parencount-=1
             end
             EoiToken===tok and lexerror tok, "eof in def header"
             result << tok
           end until  parencount==0 #@parsestack.size==old_size
           @localvars_stack.push SymbolTable.new
         else #no parentheses, all tail
           set_last_token KeywordToken.new(".") #hack hack
           tokindex=result.size
           result << tok=symbol(false,false)
           name=tok.to_s
           assert !in_lvar_define_state
  
           #maybe_local really means 'maybe local or constant'
           maybe_local=case name
             when /(?!#@@LETTER_DIGIT).$/o; #do nothing
             when /^[@$]/; true
             when VARLIKE_KEYWORDS,FUNCLIKE_KEYWORDS,("__ENCODING__" if @rubyversion>=1.9); ty=KeywordToken
             when /^#@@LCLETTER/o;  localvars===name 
             when /^#@@UCLETTER/o; is_const=true  #this is the right algorithm for constants... 
           end
           result.push(  *ignored_tokens(false,false)  )
           nc=nextchar
           if !ty and maybe_local
             if nc==?: || nc==?.
               ty=VarNameToken
             end
           end  
           if ty.nil? or (ty==KeywordToken and nc!=?: and nc!=?.)
                ty=MethNameToken
                if nc != ?(
                  endofs=tok.offset+tok.to_s.length
                  newtok=ImplicitParamListStartToken.new(endofs)
                  result.insert tokindex+1, newtok
                end
           end

           assert result[tokindex].equal?(tok)
           var=assign_lvar_type! ty.new(tok.to_s,tok.offset)
           @localvars_stack.push SymbolTable.new
           var.in_def=true if inside_method_def? and var.respond_to? :in_def=
           result[tokindex]=var
           

           #if a.b.c.d is seen, a, b and c
           #should be considered maybe varname instead of methnames.
           #the last (d in the example) is always considered a methname;
           #it's what's being defined.
           #b and c should be considered varnames only if 
           #they are capitalized and preceded by :: .
           #a could even be a keyword (eg self or block_given?).
         end
         #read tail: .b.c.d etc
         result.reverse_each{|res| break set_last_token( res ) unless StillIgnoreToken===res}
         assert !(IgnoreToken===@last_operative_token)
         state=:expect_op
         @in_def_name=true
         while true

            #look for start of parameter list
            nc=(@moretokens.empty? ? nextchar.chr : @moretokens.first.to_s[0,1])
            if state==:expect_op and /^(?:#@@LETTER|[(&*])/o===nc
               ctx.state=:def_param_list
               ctx.has_parens= '('==nc
               list,listend=def_param_list
               result.concat list
               end_index=result.index(listend)
               ofs=listend.offset
               if endofs
                 result.insert end_index,ImplicitParamListEndToken.new(ofs)
               else 
                 ofs+=listend.to_s.size
               end
               result.insert end_index+1,EndHeaderToken.new(ofs)
               break
            end

            tok=get1token
            result<< tok
            case tok
            when EoiToken
               lexerror tok,'unexpected eof in def header'
            when StillIgnoreToken
            when MethNameToken ,VarNameToken # /^#@@LETTER/o.token_pat
               lexerror tok,'expected . or ::' unless state==:expect_name
               state=:expect_op
            when /^(\.|::)$/.token_pat
               lexerror tok,'expected ident' unless state==:expect_op
               if endofs
                 result.insert( -2, ImplicitParamListEndToken.new(endofs) )
                 endofs=nil
               end
               state=:expect_name
            when /^(;|end)$/.token_pat, NewlineToken #are we done with def name?
               ctx.state=:def_body
               state==:expect_op or lexerror tok,'expected identifier'
               if endofs
                 result.insert( -2,ImplicitParamListEndToken.new(tok.offset) )
               end
               result.insert( -2, EndHeaderToken.new(tok.offset) )
               break
            else
               lexerror(tok, "bizarre token in def name: " +
                        "#{tok}:#{tok.class}")
            end
         end
         @in_def_name=false
      #}
   @moretokens= old_moretokens.concat @moretokens
      return result
end

#keyword_do(str, offset, result) ⇒ `Object`

# File 'lib/rubylexer.rb', line 1121

def keyword_do(str,offset,result)
      result.unshift(*abort_noparens_for_do!(str))
      ctx=@parsestack.last
      if ExpectDoOrNlContext===ctx
         @parsestack.pop
         assert WantsEndContext===@parsestack.last
         result.last.as=";"
      else
         result.last.has_end!
         if BlockContext===ctx and ctx.wanting_stabby_block_body
           @parsestack[-1]= WantsEndContext.new(str,@linenum)            
         else
           @parsestack.push WantsEndContext.new(str,@linenum)            
           localvars.start_block
           block_param_list_lookahead
         end
      end
      return result
end

#keyword_elsif(str, offset, result) ⇒ `Object`

# File 'lib/rubylexer.rb', line 1085

def keyword_elsif(str,offset,result) 
      result.unshift(*abort_noparens!(str))
      @parsestack.push ExpectThenOrNlContext.new(str,@linenum)
      return result
end

#keyword_end(str, offset, result) ⇒ `Object`

# File 'lib/rubylexer.rb', line 1004

def keyword_end(str,offset,result)
      result.unshift(*abort_noparens!(str))
      @parsestack.last.see self,:semi #sorta hacky... should make an :end event instead?

=begin not needed?
      if [email protected]
         @parsestack.pop
         assert @parsestack.last.starter[/^(while|until|for)$/]
      end
=end

      WantsEndContext===@parsestack.last or lexerror result.last, 'unbalanced end'
      ctx=@parsestack.pop
      start,line=ctx.starter,ctx.linenum
      BEGINWORDS===start or lexerror result.last, "end does not match #{start or "nil"}"
      /^(do)$/===start and localvars.end_block
      /^(class|module|def)$/===start and @localvars_stack.pop
      return result
end

#keyword_END(str, offset, result) ⇒ `Object`

# File 'lib/rubylexer.rb', line 1392

def keyword_END(str,offset,result)
      #END could be treated, lexically, just as if it is an
      #ordinary method, except that local vars created in
      #END blocks are visible to subsequent code. (Why??) 
      #That difference forces a custom parsing.
      if @last_operative_token===/^(\.|::)$/
        result=yield MethNameToken.new(str) #should pass a methname token here
      else
        safe_recurse{
          old=result.first
          result=[
            KeywordToken.new(old.ident,old.offset),
            ImplicitParamListStartToken.new(input_position),
            ImplicitParamListEndToken.new(input_position),
            *ignored_tokens
          ]
          getchar=='{' or lexerror(result.first,"expected { after #{str}")
          result.push KeywordToken.new('{',input_position-1)
          result.last.set_infix!
          result.last.as="do"
          @parsestack.push BeginEndContext.new(str,offset)
        }
      end
      return result
end

#keyword_for(str, offset, result) ⇒ `Object`

# File 'lib/rubylexer.rb', line 1111

def keyword_for(str,offset,result)
      result.first.has_end!
      result.push KwParamListStartToken.new(offset+str.length)
      # corresponding EndToken emitted leaving ForContext ("in" branch, below)
      @parsestack.push WantsEndContext.new(str,@linenum)
      #expect_do_or_end_or_nl! str #handled by ForSMContext now
      @parsestack.push ForSMContext.new(@linenum)
      return result
end

#keyword_if(str, offset, result) ⇒ `Object` Also known as: keyword_unless

could be infix form without end

# File 'lib/rubylexer.rb', line 1073

def keyword_if(str,offset,result)  #could be infix form without end
      if after_nonid_op?{false} #prefix form
         result.first.has_end!
         @parsestack.push WantsEndContext.new(str,@linenum)
         @parsestack.push ExpectThenOrNlContext.new(str,@linenum)
      else #infix form
        result.unshift(*abort_noparens!(str))
      end
      return result
end

#keyword_in(str, offset, result) ⇒ `Object`

# File 'lib/rubylexer.rb', line 1364

def keyword_in(str,offset,result)
      result.unshift KwParamListEndToken.new( offset)
      result.unshift(*abort_noparens!(str))
      @parsestack.last.see self,:in
      return result
end

#keyword_module(str, offset, result) ⇒ `Object`

# File 'lib/rubylexer.rb', line 1024

def keyword_module(str,offset,result) 
      result.first.has_end!
      @parsestack.push WantsEndContext.new(str,@linenum)
      offset=input_position
      assert @moretokens.empty?
      tokens=[]
      if @file.scan(/\A(#@@WSTOKS)?(#@@UCLETTER#@@LETTER_DIGIT*)(?=[#{WHSP}]+(?:[^(])|[#;\n]|::)/o) 
        md=@file.last_match
        all,ws,name=*md
        tokens.concat divide_ws(ws,md.begin(1)) if ws
        tokens.push VarNameToken.new(name,md.begin(2))
      end
      tokens.push( *read_arbitrary_expression{|tok,extra_contexts|
        #@file.check /\A(\n|;|::|end(?!#@@LETTER_DIGIT)|(#@@UCLETTER#@@LETTER_DIGIT*)(?!(#@@WSTOKS)?::))/o
        @file.check( /\A(\n|;|end(?!#@@LETTER_DIGIT))/o ) or 
          @file.check("::") && extra_contexts.all?{|ctx| ImplicitParamListContext===ctx } &&
            @moretokens.push(*abort_noparens!)
      } ) if !name #or @file.check /#@@WSTOKS?::/o
      @moretokens[0,0]=tokens
      @localvars_stack.push SymbolTable.new
      while @file.check( /\A::/ )
            #[email protected] or 
            #[email protected] && @moretokens.last.ident=="::"
        @file.scan(/\A(#@@WSTOKS)?(::)?(#@@WSTOKS)?(#@@UCLETTER#@@LETTER_DIGIT*)/o) or break
        md=@file.last_match
        all,ws1,dc,ws2,name=*md
        if ws1
          @moretokens.concat divide_ws(ws1,md.begin(1))
          incr=ws1.size
        else
          incr=0
        end
        @moretokens.push NoWsToken.new(md.begin(2)) if dc
        @moretokens.push KeywordToken.new('::',md.begin(2)) if dc
        @moretokens.concat divide_ws(ws2,md.begin(3)) if ws2
        @moretokens.push VarNameToken.new(name,md.begin(4))
      end
      @moretokens.push EndHeaderToken.new(input_position)
      return result
end

#keyword_rescue(str, offset, result) ⇒ `Object`

# File 'lib/rubylexer.rb', line 1329

def keyword_rescue(str,offset,result)
      unless after_nonid_op? {false}
        result.replace []
        #rescue needs to be treated differently when in operator context... 
        #i think no RescueSMContext should be pushed on the stack...
        tok=OperatorToken.new(str,offset)
        tok.unary=false           #plus, the rescue token should be marked as infix
        if AssignmentRhsContext===@parsestack.last
          tok.as="rescue3"
          @parsestack.pop #end rhs context
          result.push AssignmentRhsListEndToken.new(offset) #end rhs token
        else
          result.concat abort_noparens_for_rescue!(str)
        end
        result.push tok
      else         
        result.push KwParamListStartToken.new(offset+str.length)
        #corresponding EndToken emitted by abort_noparens! on leaving rescue context
        @parsestack.push RescueSMContext.new(@linenum)
#           result.unshift(*abort_noparens!(str))  
      end
      return result
end

#keyword_return(str, offset, result) ⇒ `Object` Also known as: keyword_break, keyword_next

# File 'lib/rubylexer.rb', line 1379

def keyword_return(str,offset,result)     
      fail if KeywordToken===@last_operative_token and @last_operative_token===/\A(\.|::)\Z/
      tok=KeywordToken.new(str,offset)
      result=yield tok
      result[0]=tok
      tok.has_no_block!
      return result
end

#keyword_then(str, offset, result) ⇒ `Object`

# File 'lib/rubylexer.rb', line 1353

def keyword_then(str,offset,result)
      result.unshift(*abort_noparens!(str))
      @parsestack.last.see self,:then

      if ExpectThenOrNlContext===@parsestack.last
        @parsestack.pop
      else #error... does anyone care?
      end
      return result
end

#keyword_undef(str, offset, result) ⇒ `Object`

# File 'lib/rubylexer.rb', line 1294

def keyword_undef(str,offset,result)
      safe_recurse { |a|
         loop do
            set_last_token KeywordToken.new( "," )#hack
            result.concat ignored_tokens
            tok=symbol(eat_next_if(?:),false)
            tok or lexerror(result.first,"bad symbol in undef")
            result<< tok
            set_last_token tok
            assert !(IgnoreToken===@last_operative_token)

            sawnl=false
            result.concat ignored_tokens(true){|nl| sawnl=true}

            break if sawnl or nextchar != ?,
            tok= single_char_token(?,)
            result<< tok
         end
      }
      
      return result
end

#keyword_when(str, offset, result) ⇒ `Object`

defined? might have a baresymbol following it does it need to be handled specially? it would seem not.….

# File 'lib/rubylexer.rb', line 1321

def keyword_when(str,offset,result)
      #abort_noparens! emits EndToken on leaving context
      result.unshift(*abort_noparens!(str))
      result.push KwParamListStartToken.new( offset+str.length)
      @parsestack.push WhenParamListContext.new(str,@linenum)
      return result
end

#keyword_while(str, offset, result) ⇒ `Object` Also known as: keyword_until

could be infix form without end

# File 'lib/rubylexer.rb', line 1098

def keyword_while(str,offset,result) #could be infix form without end
      if after_nonid_op?{false} #prefix form
        result.first.has_end!
        @parsestack.push WantsEndContext.new(str,@linenum)
        expect_do_or_end_or_nl! str

      else #infix form
        result.unshift(*abort_noparens!(str))
      end
      return result
end

#localvars ⇒ `Object`



252
253
254

# File 'lib/rubylexer.rb', line 252

def localvars;
  @localvars_stack.last
end

#no_more? ⇒ `Boolean`

Returns:

(Boolean)

# File 'lib/rubylexer/rulexer.rb', line 93

def no_more?
  @moretokens.each{|t| FileAndLineToken===t or return false }
  return true
end

#progress_printer ⇒ `Object`

# File 'lib/rubylexer.rb', line 241

def progress_printer
  return unless ENV['RL_PROGRESS']
  $stderr.puts 'printing progresses'
  @progress_thread=Thread.new do
    until EoiToken===@last_operative_token
      sleep 10
      $stderr.puts @file.pos
    end
  end
end

#read_leading_encoding ⇒ `Object`

# File 'lib/rubylexer.rb', line 213

def read_leading_encoding
  return unless @encoding==:detect
  @encoding=:ascii
  @encoding=:utf8 if @file.skip( "\xEF\xBB\xBF" )   #bom
  if @file.skip( /\A#!/ )
    loop do
      til_charset( /[\s\v]/ )
      break if @file.match( /^\n|[\s\v]([^-\s\v]|--?[\s\v])/,4 )
      if @file.skip( /.-K(.)/ )
        case $1
        when 'u'; @encoding=:utf8
        when 'e'; @encoding=:euc
        when 's'; @encoding=:sjis
        end
      end
    end
    til_charset( /[\n]/ )
  end
  if @rubyversion>=1.9 and @file.skip( 
       /\A#[\x00-\x7F]*?(?:en)?coding[\s\v]*[:=][\s\v]*([a-z0-9_-]+)[\x00-\x7F]*\n/i 
     )
    name=$1
    name.downcase!
    name=ENCODING_ALIASES[name] if ENCODING_ALIASES[name]
    @encoding=name.to_sym if ENCODINGS.include? name
  end
end

#set_last_token(tok) ⇒ `Object`



264
265
266

# File 'lib/rubylexer.rb', line 264

def set_last_token(tok)
  @last_operative_token=@last_token_maybe_implicit=tok
end

Class: RubyLexer

Overview

Defined Under Namespace

Constant Summary collapse

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(filename, file, line, offset_adjust = 0) ⇒ RubyLexer Also known as: rulexer_initialize

Instance Attribute Details

#file ⇒ Object

#filename ⇒ Object (readonly)

#in_def ⇒ Object

#incomplete_here_tokens ⇒ Object (readonly)

#last_operative_token ⇒ Object (readonly)

#last_token_maybe_implicit ⇒ Object (readonly)

#linenum ⇒ Object (readonly)

#localvars_stack ⇒ Object

#offset_adjust ⇒ Object (readonly)

#original_file ⇒ Object (readonly)

#parsestack ⇒ Object (readonly)

#pending_here_bodies=(value) ⇒ Object (writeonly)

#rubyversion ⇒ Object (readonly)

Instance Method Details

#_keyword_funclike(str, offset, result) ⇒ Object

#_keyword_innerbounding(str, offset, result) ⇒ Object

#_keyword_varlike(str, offset, result) ⇒ Object

#at_identifier(ch = nil) ⇒ Object

#balanced_braces? ⇒ Boolean

#dollar_identifier(ch = nil) ⇒ Object

#each ⇒ Object

#enable_macros! ⇒ Object

#endoffile_detected(s = '') ⇒ Object Also known as: rulexer_endoffile_detected

#eof? ⇒ Boolean Also known as: rulexer_eof?

#get1token ⇒ Object Also known as: rulexer_get1token

#input_position_raw ⇒ Object

#keyword___FILE__(str, offset, result) ⇒ Object

#keyword___LINE__(str, offset, result) ⇒ Object

#keyword_alias(str, offset, result) ⇒ Object

#keyword_begin(str, offset, result) ⇒ Object Also known as: keyword_case

#keyword_class(str, offset, result) ⇒ Object

#keyword_def(str, offset, result) ⇒ Object

#keyword_do(str, offset, result) ⇒ Object

#keyword_elsif(str, offset, result) ⇒ Object

#keyword_end(str, offset, result) ⇒ Object

#keyword_END(str, offset, result) ⇒ Object

#keyword_for(str, offset, result) ⇒ Object

#keyword_if(str, offset, result) ⇒ Object Also known as: keyword_unless

#keyword_in(str, offset, result) ⇒ Object

#keyword_module(str, offset, result) ⇒ Object

#keyword_rescue(str, offset, result) ⇒ Object

#keyword_return(str, offset, result) ⇒ Object Also known as: keyword_break, keyword_next

#keyword_then(str, offset, result) ⇒ Object

#keyword_undef(str, offset, result) ⇒ Object

#keyword_when(str, offset, result) ⇒ Object

#keyword_while(str, offset, result) ⇒ Object Also known as: keyword_until

#localvars ⇒ Object

#no_more? ⇒ Boolean

#progress_printer ⇒ Object

#read_leading_encoding ⇒ Object

#set_last_token(tok) ⇒ Object

#initialize(filename, file, line, offset_adjust = 0) ⇒ `RubyLexer` Also known as: rulexer_initialize

#file ⇒ `Object`

#filename ⇒ `Object` (readonly)

#in_def ⇒ `Object`

#incomplete_here_tokens ⇒ `Object` (readonly)

#last_operative_token ⇒ `Object` (readonly)

#last_token_maybe_implicit ⇒ `Object` (readonly)

#linenum ⇒ `Object` (readonly)

#localvars_stack ⇒ `Object`

#offset_adjust ⇒ `Object` (readonly)

#original_file ⇒ `Object` (readonly)

#parsestack ⇒ `Object` (readonly)

#pending_here_bodies=(value) ⇒ `Object` (writeonly)

#rubyversion ⇒ `Object` (readonly)

#_keyword_funclike(str, offset, result) ⇒ `Object`

#_keyword_innerbounding(str, offset, result) ⇒ `Object`

#_keyword_varlike(str, offset, result) ⇒ `Object`

#at_identifier(ch = nil) ⇒ `Object`

#balanced_braces? ⇒ `Boolean`

#dollar_identifier(ch = nil) ⇒ `Object`

#each ⇒ `Object`

#enable_macros! ⇒ `Object`

#endoffile_detected(s = '') ⇒ `Object` Also known as: rulexer_endoffile_detected

#eof? ⇒ `Boolean` Also known as: rulexer_eof?

#get1token ⇒ `Object` Also known as: rulexer_get1token

#input_position_raw ⇒ `Object`

#keyword_FILE(str, offset, result) ⇒ `Object`

#keyword_LINE(str, offset, result) ⇒ `Object`

#keyword_alias(str, offset, result) ⇒ `Object`

#keyword_begin(str, offset, result) ⇒ `Object` Also known as: keyword_case

#keyword_class(str, offset, result) ⇒ `Object`

#keyword_def(str, offset, result) ⇒ `Object`

#keyword_do(str, offset, result) ⇒ `Object`

#keyword_elsif(str, offset, result) ⇒ `Object`

#keyword_end(str, offset, result) ⇒ `Object`

#keyword_END(str, offset, result) ⇒ `Object`

#keyword_for(str, offset, result) ⇒ `Object`

#keyword_if(str, offset, result) ⇒ `Object` Also known as: keyword_unless

#keyword_in(str, offset, result) ⇒ `Object`

#keyword_module(str, offset, result) ⇒ `Object`

#keyword_rescue(str, offset, result) ⇒ `Object`

#keyword_return(str, offset, result) ⇒ `Object` Also known as: keyword_break, keyword_next

#keyword_then(str, offset, result) ⇒ `Object`

#keyword_undef(str, offset, result) ⇒ `Object`

#keyword_when(str, offset, result) ⇒ `Object`

#keyword_while(str, offset, result) ⇒ `Object` Also known as: keyword_until

#localvars ⇒ `Object`

#no_more? ⇒ `Boolean`

#progress_printer ⇒ `Object`

#read_leading_encoding ⇒ `Object`

#set_last_token(tok) ⇒ `Object`