Class: RKelly::Tokenizer
- Inherits:
-
Object
- Object
- RKelly::Tokenizer
- Defined in:
- lib/rkelly/tokenizer.rb
Constant Summary collapse
- KEYWORDS =
%w{ break case catch continue default delete do else finally for function if in instanceof new return switch this throw try typeof var void while with const true false null debugger }
- RESERVED =
%w{ abstract boolean byte char class double enum export extends final float goto implements import int interface long native package private protected public short static super synchronized throws transient volatile }
- LITERALS =
{ # Punctuators '==' => :EQEQ, '!=' => :NE, '===' => :STREQ, '!==' => :STRNEQ, '<=' => :LE, '>=' => :GE, '||' => :OR, '&&' => :AND, '++' => :PLUSPLUS, '--' => :MINUSMINUS, '<<' => :LSHIFT, '<<=' => :LSHIFTEQUAL, '>>' => :RSHIFT, '>>=' => :RSHIFTEQUAL, '>>>' => :URSHIFT, '>>>='=> :URSHIFTEQUAL, '&=' => :ANDEQUAL, '%=' => :MODEQUAL, '^=' => :XOREQUAL, '|=' => :OREQUAL, '+=' => :PLUSEQUAL, '-=' => :MINUSEQUAL, '*=' => :MULTEQUAL, '/=' => :DIVEQUAL, }
- TOKENS_THAT_IMPLY_DIVISION =
[:IDENT, :NUMBER, ')', ']', '}']
Instance Method Summary collapse
-
#initialize(&block) ⇒ Tokenizer
constructor
A new instance of Tokenizer.
- #raw_tokens(string) ⇒ Object
- #tokenize(string) ⇒ Object
Constructor Details
#initialize(&block) ⇒ Tokenizer
Returns a new instance of Tokenizer.
50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
# File 'lib/rkelly/tokenizer.rb', line 50 def initialize(&block) @lexemes = [] token(:COMMENT, /\A\/(?:\*(?:.)*?\*\/|\/[^\n]*)/m) token(:STRING, /\A"(?:[^"\\]*(?:\\.[^"\\]*)*)"|\A'(?:[^'\\]*(?:\\.[^'\\]*)*)'/m) # A regexp to match floating point literals (but not integer literals). token(:NUMBER, /\A\d+\.\d*(?:[eE][-+]?\d+)?|\A\d+(?:\.\d*)?[eE][-+]?\d+|\A\.\d+(?:[eE][-+]?\d+)?/m) do |type, value| value.gsub!(/\.(\D)/, '.0\1') if value =~ /\.\w/ value.gsub!(/\.$/, '.0') if value =~ /\.$/ value.gsub!(/^\./, '0.') if value =~ /^\./ [type, eval(value)] end token(:NUMBER, /\A0[xX][\da-fA-F]+|\A0[0-7]*|\A\d+/) do |type, value| [type, eval(value)] end token(:LITERALS, Regexp.new(LITERALS.keys.sort_by { |x| x.length }.reverse.map { |x| "\\A#{x.gsub(/([|+*^])/, '\\\\\1')}" }.join('|') )) do |type, value| [LITERALS[value], value] end token(:IDENT, /\A([_\$A-Za-z][_\$0-9A-Za-z]*)/) do |type,value| if KEYWORDS.include?(value) [value.upcase.to_sym, value] elsif RESERVED.include?(value) [:RESERVED, value] else [type, value] end end token(:REGEXP, /\A\/(?:[^\/\r\n\\]*(?:\\[^\r\n][^\/\r\n\\]*)*)\/[gi]*/) token(:S, /\A[\s\r\n]*/m) token(:SINGLE_CHAR, /\A./) do |type, value| [value, value] end end |
Instance Method Details
#raw_tokens(string) ⇒ Object
97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 |
# File 'lib/rkelly/tokenizer.rb', line 97 def raw_tokens(string) tokens = [] line_number = 1 accepting_regexp = true while string.length > 0 longest_token = nil @lexemes.each { |lexeme| next if lexeme.name == :REGEXP && !accepting_regexp match = lexeme.match(string) next if match.nil? longest_token = match if longest_token.nil? next if longest_token.value.length >= match.value.length longest_token = match } accepting_regexp = followable_by_regex(longest_token) longest_token.line = line_number line_number += longest_token.value.scan(/\n/).length string = string.slice(Range.new(longest_token.value.length, -1)) tokens << longest_token end tokens end |
#tokenize(string) ⇒ Object
93 94 95 |
# File 'lib/rkelly/tokenizer.rb', line 93 def tokenize(string) raw_tokens(string).map { |x| x.to_racc_token } end |