Class: BibTeX::Lexer

Inherits:
Object
  • Object
show all
Defined in:
lib/bibtex/lexer.rb

Overview

The BibTeX::Lexer handles the lexical analysis of BibTeX bibliographies.

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(options = {}) ⇒ Lexer

Creates a new instance. Possible options and their respective default values are:

  • :include => [:errors] A list that may contain :meta_comments, and

:errors; depending on whether or not these are present, the respective tokens are included in the parse tree.

  • :strict => true In strict mode objects can start anywhere; therefore

the ‘@’ symbol is not possible except inside literals or @comment objects; for a more lenient lexer set to false and objects are expected to start after a new line (leading white space is permitted).



43
44
45
46
47
48
# File 'lib/bibtex/lexer.rb', line 43

def initialize(options={})
	@options = options
	@options[:include] ||= [:errors]
	@options[:strict] = true unless @options.has_key?(:strict)
	@src = nil
end

Instance Attribute Details

#optionsObject (readonly)

Returns the value of attribute options.



29
30
31
# File 'lib/bibtex/lexer.rb', line 29

def options
  @options
end

#srcObject

Returns the value of attribute src.



29
30
31
# File 'lib/bibtex/lexer.rb', line 29

def src
  @src
end

#stackObject (readonly)

Returns the value of attribute stack.



29
30
31
# File 'lib/bibtex/lexer.rb', line 29

def stack
  @stack
end

Instance Method Details

#analyse(src = nil) ⇒ Object

Start the lexical analysis.

Raises:

  • (ArgumentError)


135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
# File 'lib/bibtex/lexer.rb', line 135

def analyse(src=nil)
	raise(ArgumentError, 'Lexer: failed to start analysis: no source given!') if src.nil? && @src.nil?
	Log.debug('Lexer: starting lexical analysis...')
	
	self.src = src || @src.string
	self.src.reset
	
	until self.src.eos?
		case
		when self.bibtex_mode?
			parse_bibtex
		when self.meta_mode?
			parse_meta
		when self.content_mode?
			parse_content
		when self.literal_mode?
			parse_literal
		end
	end
	
	Log.debug('Lexer: finished lexical analysis.')
	push [false, '$end']
end

#backtrace(error) ⇒ Object



319
320
321
322
323
324
# File 'lib/bibtex/lexer.rb', line 319

def backtrace(error)
	trace = []
	trace.unshift(@stack.pop) until @stack.empty? || (!trace.empty? && [:AT,:META_COMMENT].include?(trace[0][0]))
	trace << error
	push [:ERROR,trace]
end

#bibtex_mode?Boolean

Returns true if the lexer is currenty parsing a BibTeX object.

Returns:

  • (Boolean)


89
90
91
# File 'lib/bibtex/lexer.rb', line 89

def bibtex_mode?
	[:bibtex,:comment,:string,:preamble,:entry].include?(self.mode)
end

#content_mode?Boolean

Returns true if the lexer is currently parsing a braced-out expression.

Returns:

  • (Boolean)


99
100
101
# File 'lib/bibtex/lexer.rb', line 99

def content_mode?
	self.mode == :content
end

#enter_objectObject

Called when the lexer encounters a new BibTeX object.



267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
# File 'lib/bibtex/lexer.rb', line 267

def enter_object
	@brace_level = 0
	self.mode = :bibtex
	push [:AT,'@']

	case
	when self.src.scan(/string/io)
		self.mode = :string
		push [:STRING, self.src.matched]
	when self.src.scan(/preamble/io)
		self.mode = :preamble
		push [:PREAMBLE, self.src.matched]
	when self.src.scan(/comment/io)
		self.mode = :comment
		push [:COMMENT, self.src.matched]
	when self.src.scan(/[a-z\d:_!\.$%&*-]+/io)
		self.mode = :entry
		push [:NAME, self.src.matched]
	end
end

#error_unbalanced_bracesObject



295
296
297
298
299
# File 'lib/bibtex/lexer.rb', line 295

def error_unbalanced_braces
	n = line_number_at(self.src.pos)
	Log.warn("Lexer: unbalanced braces on line #{n}; brace level #{@brace_level}; mode #{@mode.inspect}.")
	backtrace [:E_UNBALANCED_BRACES, [self.src.matched,n]]
end

#error_unexpected_tokenObject



313
314
315
316
317
# File 'lib/bibtex/lexer.rb', line 313

def error_unexpected_token
	n = line_number_at(self.src.pos)
	Log.warn("Lexer: unexpected token `#{self.src.matched}' on line #{n}; brace level #{@brace_level}; mode #{@mode.inspect}.")
	backtrace [:E_UNEXPECTED_TOKEN, [self.src.matched,n]]
end

#error_unterminated_contentObject



307
308
309
310
311
# File 'lib/bibtex/lexer.rb', line 307

def error_unterminated_content
	n = line_number_at(self.src.pos)
	Log.warn("Lexer: unterminated content on line #{n}; brace level #{@brace_level}; mode #{@mode.inspect}.")
	backtrace [:E_UNTERMINATED_CONTENT, [self.src.matched,n]]
end

#error_unterminated_stringObject



301
302
303
304
305
# File 'lib/bibtex/lexer.rb', line 301

def error_unterminated_string
	n = line_number_at(self.src.pos)
	Log.warn("Lexer: unterminated string on line #{n}; brace level #{@brace_level}; mode #{@mode.inspect}.")
	backtrace [:E_UNTERMINATED_STRING, [self.src.matched,n]]
end

#is_active?(object) ⇒ Boolean

Returns true if the lexer is currently parsing the given object type.

Returns:

  • (Boolean)


109
110
111
# File 'lib/bibtex/lexer.rb', line 109

def is_active?(object)
	@active_object == object
end

#leave_objectObject

Called when parser leaves a BibTeX object.



289
290
291
292
# File 'lib/bibtex/lexer.rb', line 289

def leave_object
	self.mode = :meta
	@brace_level = 0
end

#line_number_at(index) ⇒ Object

Returns the line number at a given position in the source.



63
64
65
# File 'lib/bibtex/lexer.rb', line 63

def line_number_at(index)
	(@line_breaks.find_index { |n| n >= index } || 0) + 1
end

#literal_mode?Boolean

Returns true if the lexer is currently parsing a string literal.

Returns:

  • (Boolean)


104
105
106
# File 'lib/bibtex/lexer.rb', line 104

def literal_mode?
	self.mode == :literal
end

#meta_mode?Boolean

Returns true if the lexer is currently parsing meta comments.

Returns:

  • (Boolean)


94
95
96
# File 'lib/bibtex/lexer.rb', line 94

def meta_mode?
	self.mode == :meta
end

#modeObject



84
85
86
# File 'lib/bibtex/lexer.rb', line 84

def mode
	@mode
end

#mode=(mode) ⇒ Object



72
73
74
75
76
77
78
79
80
81
82
# File 'lib/bibtex/lexer.rb', line 72

def mode=(mode)
	Log.debug("Lexer: switching to #{mode} mode...")

	@active_object = case
		when [:comment,:string,:preamble,:entry].include?(mode) then mode
		when mode == :meta then nil
		else @active_object
	end

	@mode = mode
end

#next_tokenObject

Returns the next token from the parse stack.



68
69
70
# File 'lib/bibtex/lexer.rb', line 68

def next_token
	@stack.shift
end

#parse_bibtexObject



159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
# File 'lib/bibtex/lexer.rb', line 159

def parse_bibtex
	case
	when self.src.scan(/[\t\r\n\s]+/o)
	when self.src.scan(/\{/o)
		@brace_level += 1
		push [:LBRACE,'{']
		if (@brace_level == 1 && is_active?(:comment)) || (@brace_level == 2 && is_active?(:entry))
			self.mode = :content
		end
	when self.src.scan(/\}/o)
		return error_unbalanced_braces if @brace_level < 1
		@brace_level -= 1
		push [:RBRACE,'}']
		leave_object if @brace_level == 0
	when self.src.scan( /=/o)
		push [:EQ,'=']
	when self.src.scan(/,/o)
		push [:COMMA,',']
	when self.src.scan(/#/o)
		push [:SHARP,'#']
	when self.src.scan(/\d+/o)
		push [:NUMBER,self.src.matched]
	when self.src.scan(/[a-z\d:_!$\.%&*-]+/io)
		push [:NAME,self.src.matched]
	when self.src.scan(/"/o)
		self.mode = :literal
	when self.src.scan(/@/o)
		error_unexpected_token
		enter_object
	when self.src.scan(/./o)
		error_unexpected_token
		enter_object
	end
end

#parse_contentObject



205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
# File 'lib/bibtex/lexer.rb', line 205

def parse_content
	match = self.src.scan_until(/\{|\}/o)
	case self.src.matched
	when '{'
		@brace_level += 1
		push [:CONTENT,match]
	when '}'
		@brace_level -= 1
		case
		when @brace_level < 0
			push [:CONTENT,match.chop]
			error_unbalanced_braces
		when @brace_level == 0
			push [:CONTENT,match.chop]
			push [:RBRACE,'}']
			leave_object
		when @brace_level == 1 && is_active?(:entry)
			push [:CONTENT,match.chop]
			push [:RBRACE,'}']
			self.mode = :bibtex
		else
			push [:CONTENT, match]
		end
	else
		push [:CONTENT,self.src.rest]
		self.src.terminate
		error_unterminated_content
	end
end

#parse_literalObject



235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
# File 'lib/bibtex/lexer.rb', line 235

def parse_literal
	match = self.src.scan_until(/[\{\}"\n]/o)
	case self.src.matched
	when '{'
		@brace_level += 1
		push [:STRING_LITERAL,match]
	when '}'
		@brace_level -= 1
		if @brace_level < 1
			push [:STRING_LITERAL,match.chop]
			error_unbalanced_braces
		else
			push [:STRING_LITERAL,match]
		end
	when '"'
		if @brace_level == 1
			push [:STRING_LITERAL,match.chop]
			self.mode = :bibtex
		else
			push [:STRING_LITERAL,match]
		end
	when "\n"
		push [:STRING_LITERAL,match.chop]
		error_unterminated_string
	else
		push [:STRING_LITERAL,self.src.rest]
		self.src.terminate
		error_unterminated_string
	end
end

#parse_metaObject



194
195
196
197
198
199
200
201
202
203
# File 'lib/bibtex/lexer.rb', line 194

def parse_meta
	match = self.src.scan_until(@options[:strict] ? /@[\t ]*/o : /(^|\n)[\t ]*@[\t ]*/o)
	unless self.src.matched.nil?
		push [:META_COMMENT, match.chop]
		enter_object
	else
		push [:META_COMMENT,self.src.rest]
		self.src.terminate
	end
end

#push(value) ⇒ Object

Pushes a value onto the parse stack.



114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# File 'lib/bibtex/lexer.rb', line 114

def push(value)
	case
	when ([:CONTENT,:STRING_LITERAL].include?(value[0]) && value[0] == @stack.last[0])
		@stack.last[1][0] << value[1]
		@stack.last[1][1] = line_number_at(@src.pos)
	when value[0] == :ERROR
		@stack.push(value) if @options[:include].include?(:errors)
		leave_object
	when value[0] == :META_COMMENT
		if @options[:include].include?(:meta_comments)
			value[1] = [value[1], line_number_at(@src.pos)]
			@stack.push(value)
		end
	else
		value[1] = [value[1], line_number_at(@src.pos)]
		@stack.push(value)
	end
	return self
end