Module: CTokenizer

Included in:
Cache, Lexer, LexerBase, Preprocessor::Parser, Preprocessor::Tokens
Defined in:
lib/dbc/ctokenizer.rb

Overview

Copyright © 2004 Charles M Mills This document is licenced under The MIT Licence. THIS SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND. See included LICENCE file.

Defined Under Namespace

Modules: Scoped Classes: CLexer, CPLexer, Cache, Error, Lexer, LexerBase, SkipMacros, Splitter

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.check_string(str) ⇒ Object



21
22
23
# File 'lib/dbc/ctokenizer.rb', line 21

def CTokenizer.check_string(str)
	raise "expecting a String: #{str.class}" unless str.class <= String
end

.check_token(t) ⇒ Object



24
25
26
27
# File 'lib/dbc/ctokenizer.rb', line 24

def CTokenizer.check_token(t)
	raise "expecting a Array[2]: #{t.inspect}" \
		unless t.class <= Array and t.length == 2
end

.create_newlines(start, finish) ⇒ Object



29
30
31
32
33
# File 'lib/dbc/ctokenizer.rb', line 29

def CTokenizer.create_newlines(start, finish)
	newlines = ''
	(finish - start).times { newlines << "\n" }
	[:NEWLINE, newlines.freeze].freeze
end

.error(file, line, msg) ⇒ Object

Raises:



17
18
19
# File 'lib/dbc/ctokenizer.rb', line 17

def CTokenizer.error(file, line, msg)
	raise CTokenizer::Error.new(file, line), msg
end

.join(tokens) ⇒ Object



111
112
113
114
115
116
117
# File 'lib/dbc/ctokenizer.rb', line 111

def CTokenizer.join(tokens)
	str = ''
	tokens.each do |t|
		str << t[1]
	end
	str
end

.line_count(str) ⇒ Object



35
36
37
38
39
# File 'lib/dbc/ctokenizer.rb', line 35

def CTokenizer.line_count(str)
	count = 0
	str.scan(/\r\n|\n\r|\n|\r/) { count += 1 } if str.class == String
	count
end

.split(str) ⇒ Object



102
103
104
105
106
107
108
109
# File 'lib/dbc/ctokenizer.rb', line 102

def CTokenizer.split(str)
	tokens = []
	until str.empty?
		t, str = CTokenizer.split_token(str)
		tokens << t
	end # until
	tokens
end

.split_token(str) ⇒ Object

tokens are immutable



42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# File 'lib/dbc/ctokenizer.rb', line 42

def CTokenizer.split_token(str)
	check_string(str)
	# would be easier if '\n' was the only kind of newline....
	token = case str
		when /\A[\t ]+/o
			[:SPACE, $&]
		when /\A(?:\r\n|\n\r|\r|\n)/o
			[:NEWLINE, $&]
		when /\A\\[\t ]*(?:\r\n|\n\r|\r|\n)/o
			[:SPACE, $&]
		when /\A\/\*.*?\*\//m
			[:COMMENT, $&]
		when /\A\/\/(?:\\[ \t]*(?:\r\n|\n\r|\r|\n)|[^\r\n])+/o
			# scarry comment - bad style - beward of line \ at end of line...
			[:COMMENT, $&]
		when /\A(?:\+=|\-=|\*=|\/=|%=|\&=|\^=|\|=|<<=|>>=|##|\.\.\.)/
			[:SYMBOL, $&]
		when /\A(?:==|!=|<=|>=|->|\&\&|\|\||<<|>>|\+\+|\-\-)/o
			[:SYMBOL, $&]
		when /\A(?:<:|:>|<%|%>)/o
			[:SYMBOL, $&]
		when /\A[\(\)\[\]\{\}\|\&\+\-\/\*%<>\.,=!:;\?\^~#]/o
			[:SYMBOL, $&]
		when /\AL?'(?:[^']|\\.)*'/o
			[:CHARACTER, $&]
		when /\AL?"(?:[^"]|\\.)*"/o
			[:STRING, $&]
		when /\A[a-zA-Z_]\w*/o
			[:IDENTIFIER, $&]
		# FLOAT should come before INTEGER
		when /\A(?:[0-9]*\.[0-9]+)|(?:[0-9]+\.)[eE][-+]?[0-9]+?[fFlL]?/o
			[:FLOAT, $&]
		when /\A[0-9]+[eE][-+]?[0-9]+[fFlL]?/o
			[:FLOAT, $&]
		when /\A0[xX][0-9a-fA-F]+(?:(?:[uU][lL]?)|(?:[lL][uU]?)?)/o
			[:INTEGER, $&]
		when /\A0[0-7]+(?:(?:[uU][lL]?)|(?:[lL][uU]?)?)/o
			[:INTEGER, $&]
		when /\A\d+(?:(?:[uU][lL]?)|(?:[lL][uU]?)?)/o
			[:INTEGER, $&]
		when /\A\Z/o
			[false, false] # end of file
		when /\A./m
			[:UNKNOWN, $&]
		else
			raise "shouldn't get here!"
	end # case
	token[1].freeze
	[token.freeze, $']
end

.whitespace?(t) ⇒ Boolean

Returns:

  • (Boolean)


93
94
95
96
97
98
99
100
# File 'lib/dbc/ctokenizer.rb', line 93

def CTokenizer.whitespace?(t)
	case t[0]
		when :SPACE, :NEWLINE, :COMMENT
			true
		else
			false
	end
end

Instance Method Details

#collectObject



150
151
152
153
154
155
156
# File 'lib/dbc/ctokenizer.rb', line 150

def collect
	ary = []
	until self.empty?
		ary << yield(self.shift)
	end
	ary
end

#eachObject



143
144
145
146
147
148
# File 'lib/dbc/ctokenizer.rb', line 143

def each
	until self.empty?
		yield(self.shift)
	end
	self
end

#error(msg) ⇒ Object



119
120
121
# File 'lib/dbc/ctokenizer.rb', line 119

def error(msg)
	CTokenizer.error(file, line, msg)
end

#parse_error(token) ⇒ Object



127
128
129
# File 'lib/dbc/ctokenizer.rb', line 127

def parse_error(token)
	self.error("parse error on token: #{token}")
end

#to_aObject



135
136
137
138
139
140
141
# File 'lib/dbc/ctokenizer.rb', line 135

def to_a
	ary = []
	until self.empty?
		ary << self.shift
	end
	ary
end

#token_error(token) ⇒ Object



123
124
125
# File 'lib/dbc/ctokenizer.rb', line 123

def token_error(token)
	self.error("unrecognized token: #{token}")
end

#warning(msg) ⇒ Object



131
132
133
# File 'lib/dbc/ctokenizer.rb', line 131

def warning(msg)
	warn "#{file + ':' if file}#{line}: #{msg}"
end