Module: CTokenizer
- Included in:
- Cache, Lexer, LexerBase, Preprocessor::Parser, Preprocessor::Tokens
- Defined in:
- lib/dbc/ctokenizer.rb
Overview
Copyright © 2004 Charles M Mills This document is licenced under The MIT Licence. THIS SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND. See included LICENCE file.
Defined Under Namespace
Modules: Scoped Classes: CLexer, CPLexer, Cache, Error, Lexer, LexerBase, SkipMacros, Splitter
Class Method Summary collapse
- .check_string(str) ⇒ Object
- .check_token(t) ⇒ Object
- .create_newlines(start, finish) ⇒ Object
- .error(file, line, msg) ⇒ Object
- .join(tokens) ⇒ Object
- .line_count(str) ⇒ Object
- .split(str) ⇒ Object
-
.split_token(str) ⇒ Object
tokens are immutable.
- .whitespace?(t) ⇒ Boolean
Instance Method Summary collapse
- #collect ⇒ Object
- #each ⇒ Object
- #error(msg) ⇒ Object
- #parse_error(token) ⇒ Object
- #to_a ⇒ Object
- #token_error(token) ⇒ Object
- #warning(msg) ⇒ Object
Class Method Details
.check_string(str) ⇒ Object
21 22 23 |
# File 'lib/dbc/ctokenizer.rb', line 21 def CTokenizer.check_string(str) raise "expecting a String: #{str.class}" unless str.class <= String end |
.check_token(t) ⇒ Object
24 25 26 27 |
# File 'lib/dbc/ctokenizer.rb', line 24 def CTokenizer.check_token(t) raise "expecting a Array[2]: #{t.inspect}" \ unless t.class <= Array and t.length == 2 end |
.create_newlines(start, finish) ⇒ Object
29 30 31 32 33 |
# File 'lib/dbc/ctokenizer.rb', line 29 def CTokenizer.create_newlines(start, finish) newlines = '' (finish - start).times { newlines << "\n" } [:NEWLINE, newlines.freeze].freeze end |
.error(file, line, msg) ⇒ Object
17 18 19 |
# File 'lib/dbc/ctokenizer.rb', line 17 def CTokenizer.error(file, line, msg) raise CTokenizer::Error.new(file, line), msg end |
.join(tokens) ⇒ Object
111 112 113 114 115 116 117 |
# File 'lib/dbc/ctokenizer.rb', line 111 def CTokenizer.join(tokens) str = '' tokens.each do |t| str << t[1] end str end |
.line_count(str) ⇒ Object
35 36 37 38 39 |
# File 'lib/dbc/ctokenizer.rb', line 35 def CTokenizer.line_count(str) count = 0 str.scan(/\r\n|\n\r|\n|\r/) { count += 1 } if str.class == String count end |
.split(str) ⇒ Object
102 103 104 105 106 107 108 109 |
# File 'lib/dbc/ctokenizer.rb', line 102 def CTokenizer.split(str) tokens = [] until str.empty? t, str = CTokenizer.split_token(str) tokens << t end # until tokens end |
.split_token(str) ⇒ Object
tokens are immutable
42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
# File 'lib/dbc/ctokenizer.rb', line 42 def CTokenizer.split_token(str) check_string(str) # would be easier if '\n' was the only kind of newline.... token = case str when /\A[\t ]+/o [:SPACE, $&] when /\A(?:\r\n|\n\r|\r|\n)/o [:NEWLINE, $&] when /\A\\[\t ]*(?:\r\n|\n\r|\r|\n)/o [:SPACE, $&] when /\A\/\*.*?\*\//m [:COMMENT, $&] when /\A\/\/(?:\\[ \t]*(?:\r\n|\n\r|\r|\n)|[^\r\n])+/o # scarry comment - bad style - beward of line \ at end of line... [:COMMENT, $&] when /\A(?:\+=|\-=|\*=|\/=|%=|\&=|\^=|\|=|<<=|>>=|##|\.\.\.)/ [:SYMBOL, $&] when /\A(?:==|!=|<=|>=|->|\&\&|\|\||<<|>>|\+\+|\-\-)/o [:SYMBOL, $&] when /\A(?:<:|:>|<%|%>)/o [:SYMBOL, $&] when /\A[\(\)\[\]\{\}\|\&\+\-\/\*%<>\.,=!:;\?\^~#]/o [:SYMBOL, $&] when /\AL?'(?:[^']|\\.)*'/o [:CHARACTER, $&] when /\AL?"(?:[^"]|\\.)*"/o [:STRING, $&] when /\A[a-zA-Z_]\w*/o [:IDENTIFIER, $&] # FLOAT should come before INTEGER when /\A(?:[0-9]*\.[0-9]+)|(?:[0-9]+\.)[eE][-+]?[0-9]+?[fFlL]?/o [:FLOAT, $&] when /\A[0-9]+[eE][-+]?[0-9]+[fFlL]?/o [:FLOAT, $&] when /\A0[xX][0-9a-fA-F]+(?:(?:[uU][lL]?)|(?:[lL][uU]?)?)/o [:INTEGER, $&] when /\A0[0-7]+(?:(?:[uU][lL]?)|(?:[lL][uU]?)?)/o [:INTEGER, $&] when /\A\d+(?:(?:[uU][lL]?)|(?:[lL][uU]?)?)/o [:INTEGER, $&] when /\A\Z/o [false, false] # end of file when /\A./m [:UNKNOWN, $&] else raise "shouldn't get here!" end # case token[1].freeze [token.freeze, $'] end |
.whitespace?(t) ⇒ Boolean
93 94 95 96 97 98 99 100 |
# File 'lib/dbc/ctokenizer.rb', line 93 def CTokenizer.whitespace?(t) case t[0] when :SPACE, :NEWLINE, :COMMENT true else false end end |
Instance Method Details
#collect ⇒ Object
150 151 152 153 154 155 156 |
# File 'lib/dbc/ctokenizer.rb', line 150 def collect ary = [] until self.empty? ary << yield(self.shift) end ary end |
#each ⇒ Object
143 144 145 146 147 148 |
# File 'lib/dbc/ctokenizer.rb', line 143 def each until self.empty? yield(self.shift) end self end |
#error(msg) ⇒ Object
119 120 121 |
# File 'lib/dbc/ctokenizer.rb', line 119 def error(msg) CTokenizer.error(file, line, msg) end |
#parse_error(token) ⇒ Object
127 128 129 |
# File 'lib/dbc/ctokenizer.rb', line 127 def parse_error(token) self.error("parse error on token: #{token}") end |
#to_a ⇒ Object
135 136 137 138 139 140 141 |
# File 'lib/dbc/ctokenizer.rb', line 135 def to_a ary = [] until self.empty? ary << self.shift end ary end |
#token_error(token) ⇒ Object
123 124 125 |
# File 'lib/dbc/ctokenizer.rb', line 123 def token_error(token) self.error("unrecognized token: #{token}") end |
#warning(msg) ⇒ Object
131 132 133 |
# File 'lib/dbc/ctokenizer.rb', line 131 def warning(msg) warn "#{file + ':' if file}#{line}: #{msg}" end |