Class: RKelly::Tokenizer
- Inherits:
-
Object
- Object
- RKelly::Tokenizer
- Defined in:
- lib/rkelly/tokenizer.rb
Constant Summary collapse
- KEYWORDS =
%w{ break case catch continue default delete do else finally for function if in instanceof new return switch this throw try typeof var void while with const true false null debugger }
- RESERVED =
%w{ abstract boolean byte char class double enum export extends final float goto implements import int interface long native package private protected public short static super synchronized throws transient volatile }
- LITERALS =
{ # Punctuators '==' => :EQEQ, '!=' => :NE, '===' => :STREQ, '!==' => :STRNEQ, '<=' => :LE, '>=' => :GE, '||' => :OR, '&&' => :AND, '++' => :PLUSPLUS, '--' => :MINUSMINUS, '<<' => :LSHIFT, '<<=' => :LSHIFTEQUAL, '>>' => :RSHIFT, '>>=' => :RSHIFTEQUAL, '>>>' => :URSHIFT, '>>>='=> :URSHIFTEQUAL, '&=' => :ANDEQUAL, '%=' => :MODEQUAL, '^=' => :XOREQUAL, '|=' => :OREQUAL, '+=' => :PLUSEQUAL, '-=' => :MINUSEQUAL, '*=' => :MULTEQUAL, '/=' => :DIVEQUAL, }
Instance Method Summary collapse
-
#initialize(&block) ⇒ Tokenizer
constructor
A new instance of Tokenizer.
- #raw_tokens(string) ⇒ Object
- #tokenize(string) ⇒ Object
Constructor Details
#initialize(&block) ⇒ Tokenizer
Returns a new instance of Tokenizer.
48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
# File 'lib/rkelly/tokenizer.rb', line 48 def initialize(&block) @lexemes = [] token(:COMMENT, /\A\/(?:\*(?:.)*?\*\/|\/[^\n]*)/m) token(:STRING, /\A"(?:[^"\\]*(?:\\.[^"\\]*)*)"|\A'(?:[^'\\]*(?:\\.[^'\\]*)*)'/m) # A regexp to match floating point literals (but not integer literals). token(:NUMBER, /\A\d+\.\d*(?:[eE][-+]?\d+)?|\A\d+(?:\.\d*)?[eE][-+]?\d+|\A\.\d+(?:[eE][-+]?\d+)?/m) do |type, value| value.gsub!(/\.(\D)/, '.0\1') if value =~ /\.\w/ value.gsub!(/\.$/, '.0') if value =~ /\.$/ value.gsub!(/^\./, '0.') if value =~ /^\./ [type, eval(value)] end token(:NUMBER, /\A0[xX][\da-fA-F]+|\A0[0-7]*|\A\d+/) do |type, value| [type, eval(value)] end token(:LITERALS, Regexp.new(LITERALS.keys.sort_by { |x| x.length }.reverse.map { |x| "\\A#{x.gsub(/([|+*^])/, '\\\\\1')}" }.join('|') )) do |type, value| [LITERALS[value], value] end token(:IDENT, /\A(\w|\$)+/) do |type,value| if KEYWORDS.include?(value) [value.upcase.to_sym, value] elsif RESERVED.include?(value) [:RESERVED, value] else [type, value] end end token(:REGEXP, /\A\/(?:[^\/\r\n\\]*(?:\\[^\r\n][^\/\r\n\\]*)*)\/[gi]*/) token(:S, /\A[\s\r\n]*/m) token(:SINGLE_CHAR, /\A./) do |type, value| [value, value] end end |
Instance Method Details
#raw_tokens(string) ⇒ Object
95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 |
# File 'lib/rkelly/tokenizer.rb', line 95 def raw_tokens(string) tokens = [] line_number = 1 while string.length > 0 longest_token = nil @lexemes.each { |lexeme| match = lexeme.match(string) next if match.nil? longest_token = match if longest_token.nil? next if longest_token.value.length >= match.value.length longest_token = match } longest_token.line = line_number line_number += longest_token.value.scan(/\n/).length string = string.slice(Range.new(longest_token.value.length, -1)) tokens << longest_token end tokens end |
#tokenize(string) ⇒ Object
91 92 93 |
# File 'lib/rkelly/tokenizer.rb', line 91 def tokenize(string) raw_tokens(string).map { |x| x.to_racc_token } end |