Class: Twostroke::Lexer

Inherits:
Object
  • Object
show all
Defined in:
lib/twostroke/lexer.rb,
lib/twostroke/tokens.rb

Constant Summary collapse

RESERVED =
%w(
      function var if instanceof in else for while do this return
throw typeof try catch finally void null new delete switch
case break continue default true false with)
TOKENS =
[

  [ :MULTI_COMMENT, %r{/\*.*?\*/} ],
  [ :SINGLE_COMMENT, /\/\/.*?($|\r|\u2029|\u2028)/ ],

  [ :LINE_TERMINATOR, /[\n\r\u2028\u2029]/ ],
  [ :WHITESPACE, /[ \t\r\v\f]+/ ],
  [ :NUMBER, /((?<oct>0[0-7]+)|(?<hex>0x[A-Fa-f0-9]+)|(?<to_f>(\d+(\.?\d*([eE][+-]?\d+)?)?|\.\d+([eE][+-]?\d+)?)))/, ->m do
    method, number = m.names.zip(m.captures).select { |k,v| v }.first
    n = number.send method
    if (n % 1).zero?
      n.to_i
    else
      n
    end
  end ],

  *RESERVED.map do |w|
    [ w.upcase.intern, /#{w}(?=[^a-zA-Z_0-9])/ ]
  end,
  [ :BAREWORD, /[a-zA-Z_\$][\$a-zA-Z_0-9]*/, ->m { m[0] } ],

  [ :STRING, /(["'])((\\\n|\\.|[^\n\r\u2028\u2029\1])*?[^\1\\]?)\1/, ->m do
    m[2].gsub(/\\(([0-6]{1,3})|u([a-f0-9]{4})|x([a-f0-9]{2})|\n|.)/i) do |m|
      case m
      when /\\([0-6]{1,3})/; m[1..-1].to_i(8).chr "utf-8" 
      when /\\u([a-f0-9]{4})/i; m[2..-1].to_i(16).chr "utf-8"
      when /\\x([a-f0-9]{2})/i; m[2..-1].to_i(16).chr "utf-8"
      else case m[1]
             when "b"; "\b"
             when "n"; "\n"
             when "f"; "\f"
             when "v"; "\v"
             when "r"; "\r"
             when "t"; "\t"
             when "\n"; ""
             else; m[1]
           end
      end
    end
  end ],
  
  [ :REGEXP, %r{/(?<src>(\\.|[^\1])*?[^\1\\]?)/(?<opts>[gim]+)?}, ->m { [m[:src], m[:opts]] } ],

  [ :OPEN_PAREN, /\(/ ],
  [ :CLOSE_PAREN, /\)/ ],
  [ :OPEN_BRACKET, /\[/ ],
  [ :CLOSE_BRACKET, /\]/ ],
  [ :OPEN_BRACE, /\{/ ],
  [ :CLOSE_BRACE, /\}/ ],

  [ :MEMBER_ACCESS, /\./ ],

  [ :ADD_EQUALS, /\+=/ ],
  [ :MINUS_EQUALS, /-=/ ],
  [ :TIMES_EQUALS, /\*=/ ], # textmate barfs it's syntax highlighting on this one lol
  [ :DIVIDE_EQUALS, /\/=/ ],
  [ :MOD_EQUALS, /%=/ ],
  [ :LEFT_SHIFT_EQUALS, /<<=/ ],
  [ :RIGHT_TRIPLE_SHIFT_EQUALS, />>>=/ ],
  [ :RIGHT_SHIFT_EQUALS, />>=/ ],
  [ :BITWISE_AND_EQUALS, /&=/ ],
  [ :BITWISE_XOR_EQUALS, /\^=/ ],
  [ :BITWISE_OR_EQUALS, /\|=/ ],

  [ :INCREMENT, /\+\+/ ],
  [ :DECREMENT, /--/ ],
  [ :PLUS, /\+/ ],
  [ :MINUS, /-/ ],
  [ :ASTERISK, /\*/ ],
  [ :SLASH, /\// ],
  [ :MOD, /%/ ],
  [ :QUESTION, /\?/ ],
  [ :COMMA, /,/ ],
  [ :SEMICOLON, /;/ ],
  [ :COLON, /:/ ],

  [ :AND, /&&/ ],
  [ :AMPERSAND, /&/ ],
  [ :OR, /\|\|/ ],
  [ :PIPE, /\|/ ],
  [ :TRIPLE_EQUALS, /===/ ],
  [ :DOUBLE_EQUALS, /==/ ],
  [ :EQUALS, /=/ ],
  [ :NOT_DOUBLE_EQUALS, /!==/ ],
  [ :NOT_EQUALS, /!=/ ],
  [ :NOT, /!/ ],
  [ :TILDE, /~/ ],
  [ :CARET, /\^/ ],

  [ :LEFT_SHIFT, /<</ ],
  [ :RIGHT_TRIPLE_SHIFT, />>>/ ],
  [ :RIGHT_SHIFT, />>/ ],
  [ :LTE, /<=/ ],
  [ :GTE, />=/ ],
  [ :LT, /</ ],
  [ :GT, />/ ],

].map do |a|
  [a[0], Regexp.new("\\A#{a[1].source}", Regexp::MULTILINE), a[2]]
end

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(str) ⇒ Lexer

Returns a new instance of Lexer.



26
27
28
29
30
31
32
# File 'lib/twostroke/lexer.rb', line 26

def initialize(str)
  @str = str
  @col = 1
  @line = 1
  @line_terminator = false
  @restricted = false
end

Instance Attribute Details

#colObject

Returns the value of attribute col.



15
16
17
# File 'lib/twostroke/lexer.rb', line 15

def col
  @col
end

#lineObject

Returns the value of attribute line.



15
16
17
# File 'lib/twostroke/lexer.rb', line 15

def line
  @line
end

#restrictedObject

Returns the value of attribute restricted.



15
16
17
# File 'lib/twostroke/lexer.rb', line 15

def restricted
  @restricted
end

#strObject

Returns the value of attribute str.



15
16
17
# File 'lib/twostroke/lexer.rb', line 15

def str
  @str
end

Instance Method Details

#read_token(allow_regexp = true) ⇒ Object



41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# File 'lib/twostroke/lexer.rb', line 41

def read_token(allow_regexp = true)
  TOKENS.select { |t| allow_regexp || t[0] != :REGEXP }.each do |token|
    m = token[1].match @str
    if m
      tok = Token.new(:type => token[0], :val => token[2] ? token[2].call(m) : nil, :line => @line, :col => @col)
      @str = m.post_match
      newlines = m[0].count "\n"
      @col = 1 if !newlines.zero?
      @line += newlines
      @col += m[0].length - (m[0].rindex("\n") || 0)
      if [:WHITESPACE, :MULTI_COMMENT, :SINGLE_COMMENT].include?(token[0]) or (!restricted && token[0] == :LINE_TERMINATOR)
        return read_token(allow_regexp)
      else
        return tok
      end
    end
  end
  if @str.size > 0
    raise LexError, "Illegal character '#{@str[0]}' at line #{@line}, col #{@col}."
  else
    nil
  end
end

#restrictObject



34
35
36
37
38
39
# File 'lib/twostroke/lexer.rb', line 34

def restrict
  @restricted = true
  retn = yield
  @restricted = false
  retn
end

#stateObject



17
18
19
# File 'lib/twostroke/lexer.rb', line 17

def state
  { str: str, col: col, line: line }
end

#state=(state) ⇒ Object



20
21
22
23
24
# File 'lib/twostroke/lexer.rb', line 20

def state=(state)
  @str = state[:str]
  @col = state[:col]
  @line = state[:line]
end