Class: Twostroke::Lexer
- Inherits:
-
Object
- Object
- Twostroke::Lexer
- Defined in:
- lib/twostroke/lexer.rb,
lib/twostroke/tokens.rb
Constant Summary collapse
- RESERVED =
%w( function var if instanceof in else for while do this return throw typeof try catch finally void null new delete switch case break continue default true false with)
- TOKENS =
[ [ :MULTI_COMMENT, %r{/\*.*?\*/} ], [ :SINGLE_COMMENT, /\/\/.*?($|\r|\u2029|\u2028)/ ], [ :LINE_TERMINATOR, /[\n\r\u2028\u2029]/ ], [ :WHITESPACE, /[ \t\r\v\f]+/ ], [ :NUMBER, /((?<oct>0[0-7]+)|(?<hex>0x[A-Fa-f0-9]+)|(?<to_f>(\d+(\.?\d*([eE][+-]?\d+)?)?|\.\d+([eE][+-]?\d+)?)))/, ->m do method, number = m.names.zip(m.captures).select { |k,v| v }.first n = number.send method if (n % 1).zero? n.to_i else n end end ], *RESERVED.map do |w| [ w.upcase.intern, /#{w}(?=[^a-zA-Z_0-9])/ ] end, [ :BAREWORD, /[a-zA-Z_\$][\$a-zA-Z_0-9]*/, ->m { m[0] } ], [ :STRING, /(["'])((\\\n|\\.|[^\n\r\u2028\u2029\1])*?[^\1\\]?)\1/, ->m do m[2].gsub(/\\(([0-6]{1,3})|u([a-f0-9]{4})|x([a-f0-9]{2})|\n|.)/i) do |m| case m when /\\([0-6]{1,3})/; m[1..-1].to_i(8).chr "utf-8" when /\\u([a-f0-9]{4})/i; m[2..-1].to_i(16).chr "utf-8" when /\\x([a-f0-9]{2})/i; m[2..-1].to_i(16).chr "utf-8" else case m[1] when "b"; "\b" when "n"; "\n" when "f"; "\f" when "v"; "\v" when "r"; "\r" when "t"; "\t" when "\n"; "" else; m[1] end end end end ], [ :REGEXP, %r{/(?<src>(\\.|[^\1])*?[^\1\\]?)/(?<opts>[gim]+)?}, ->m { [m[:src], m[:opts]] } ], [ :OPEN_PAREN, /\(/ ], [ :CLOSE_PAREN, /\)/ ], [ :OPEN_BRACKET, /\[/ ], [ :CLOSE_BRACKET, /\]/ ], [ :OPEN_BRACE, /\{/ ], [ :CLOSE_BRACE, /\}/ ], [ :MEMBER_ACCESS, /\./ ], [ :ADD_EQUALS, /\+=/ ], [ :MINUS_EQUALS, /-=/ ], [ :TIMES_EQUALS, /\*=/ ], # textmate barfs it's syntax highlighting on this one lol [ :DIVIDE_EQUALS, /\/=/ ], [ :MOD_EQUALS, /%=/ ], [ :LEFT_SHIFT_EQUALS, /<<=/ ], [ :RIGHT_TRIPLE_SHIFT_EQUALS, />>>=/ ], [ :RIGHT_SHIFT_EQUALS, />>=/ ], [ :BITWISE_AND_EQUALS, /&=/ ], [ :BITWISE_XOR_EQUALS, /\^=/ ], [ :BITWISE_OR_EQUALS, /\|=/ ], [ :INCREMENT, /\+\+/ ], [ :DECREMENT, /--/ ], [ :PLUS, /\+/ ], [ :MINUS, /-/ ], [ :ASTERISK, /\*/ ], [ :SLASH, /\// ], [ :MOD, /%/ ], [ :QUESTION, /\?/ ], [ :COMMA, /,/ ], [ :SEMICOLON, /;/ ], [ :COLON, /:/ ], [ :AND, /&&/ ], [ :AMPERSAND, /&/ ], [ :OR, /\|\|/ ], [ :PIPE, /\|/ ], [ :TRIPLE_EQUALS, /===/ ], [ :DOUBLE_EQUALS, /==/ ], [ :EQUALS, /=/ ], [ :NOT_DOUBLE_EQUALS, /!==/ ], [ :NOT_EQUALS, /!=/ ], [ :NOT, /!/ ], [ :TILDE, /~/ ], [ :CARET, /\^/ ], [ :LEFT_SHIFT, /<</ ], [ :RIGHT_TRIPLE_SHIFT, />>>/ ], [ :RIGHT_SHIFT, />>/ ], [ :LTE, /<=/ ], [ :GTE, />=/ ], [ :LT, /</ ], [ :GT, />/ ], ].map do |a| [a[0], Regexp.new("\\A#{a[1].source}", Regexp::MULTILINE), a[2]] end
Instance Attribute Summary collapse
-
#col ⇒ Object
Returns the value of attribute col.
-
#line ⇒ Object
Returns the value of attribute line.
-
#restricted ⇒ Object
Returns the value of attribute restricted.
-
#str ⇒ Object
Returns the value of attribute str.
Instance Method Summary collapse
-
#initialize(str) ⇒ Lexer
constructor
A new instance of Lexer.
- #read_token(allow_regexp = true) ⇒ Object
- #restrict ⇒ Object
- #state ⇒ Object
- #state=(state) ⇒ Object
Constructor Details
#initialize(str) ⇒ Lexer
Returns a new instance of Lexer.
26 27 28 29 30 31 32 |
# File 'lib/twostroke/lexer.rb', line 26 def initialize(str) @str = str @col = 1 @line = 1 @line_terminator = false @restricted = false end |
Instance Attribute Details
#col ⇒ Object
Returns the value of attribute col.
15 16 17 |
# File 'lib/twostroke/lexer.rb', line 15 def col @col end |
#line ⇒ Object
Returns the value of attribute line.
15 16 17 |
# File 'lib/twostroke/lexer.rb', line 15 def line @line end |
#restricted ⇒ Object
Returns the value of attribute restricted.
15 16 17 |
# File 'lib/twostroke/lexer.rb', line 15 def restricted @restricted end |
#str ⇒ Object
Returns the value of attribute str.
15 16 17 |
# File 'lib/twostroke/lexer.rb', line 15 def str @str end |
Instance Method Details
#read_token(allow_regexp = true) ⇒ Object
41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
# File 'lib/twostroke/lexer.rb', line 41 def read_token(allow_regexp = true) TOKENS.select { |t| allow_regexp || t[0] != :REGEXP }.each do |token| m = token[1].match @str if m tok = Token.new(:type => token[0], :val => token[2] ? token[2].call(m) : nil, :line => @line, :col => @col) @str = m.post_match newlines = m[0].count "\n" @col = 1 if !newlines.zero? @line += newlines @col += m[0].length - (m[0].rindex("\n") || 0) if [:WHITESPACE, :MULTI_COMMENT, :SINGLE_COMMENT].include?(token[0]) or (!restricted && token[0] == :LINE_TERMINATOR) return read_token(allow_regexp) else return tok end end end if @str.size > 0 raise LexError, "Illegal character '#{@str[0]}' at line #{@line}, col #{@col}." else nil end end |
#restrict ⇒ Object
34 35 36 37 38 39 |
# File 'lib/twostroke/lexer.rb', line 34 def restrict @restricted = true retn = yield @restricted = false retn end |
#state ⇒ Object
17 18 19 |
# File 'lib/twostroke/lexer.rb', line 17 def state { str: str, col: col, line: line } end |
#state=(state) ⇒ Object
20 21 22 23 24 |
# File 'lib/twostroke/lexer.rb', line 20 def state=(state) @str = state[:str] @col = state[:col] @line = state[:line] end |