Class: Puppet::Parser::Lexer
Defined Under Namespace
Constant Summary collapse
- TOKENS =
TokenList.new
- DQ_initial_token_types =
{'$' => :DQPRE,'"' => :STRING}
- DQ_continuation_token_types =
{'$' => :DQMID,'"' => :DQPOST}
- KEYWORDS =
TokenList.new
- @@pairs =
{ "{" => "}", "(" => ")", "[" => "]", "<|" => "|>", "<<|" => "|>>" }
Instance Attribute Summary collapse
-
#file ⇒ Object
Returns the value of attribute file.
-
#indefine ⇒ Object
Returns the value of attribute indefine.
-
#last ⇒ Object
readonly
Returns the value of attribute last.
-
#lexing_context ⇒ Object
readonly
Returns the value of attribute lexing_context.
-
#line ⇒ Object
Returns the value of attribute line.
-
#token_queue ⇒ Object
readonly
Returns the value of attribute token_queue.
Class Method Summary collapse
-
.acceptable?(context = {}) ⇒ Boolean
:stopdoc: # Issue #4161.
Instance Method Summary collapse
- #clear ⇒ Object
-
#commentpop ⇒ Object
returns the content of the currently accumulated content cache.
- #commentpush ⇒ Object
- #expected ⇒ Object
-
#find_regex_token ⇒ Object
Find the next token that matches a regex.
- #find_string_token ⇒ Object
-
#find_token ⇒ Object
Find the next token, returning the string and the token.
-
#fullscan ⇒ Object
scan the whole file basically just used for testing.
- #getcomment(line = nil) ⇒ Object
- #indefine? ⇒ Boolean
-
#initialize ⇒ Lexer
constructor
A new instance of Lexer.
- #initvars ⇒ Object
- #lex_error(msg) ⇒ Object
-
#munge_token(token, value) ⇒ Object
Make any necessary changes to the token and/or value.
-
#namepop ⇒ Object
Go up one in the namespace.
-
#namespace ⇒ Object
Collect the current namespace.
-
#namestack(value) ⇒ Object
- This value might have
-
in it, but we don’t care – it’ll be handled normally when joining, and when popping we want to pop this full value, however long the namespace is.
- #rest ⇒ Object
-
#scan {|[false,false]| ... } ⇒ Object
this is the heart of the lexer.
-
#scan_until(regex) ⇒ Object
Provide some limited access to the scanner, for those tokens that need it.
- #shift_token ⇒ Object
-
#skip ⇒ Object
Skip any skipchars in our remaining string.
-
#slurpstring(terminators, escapes = %w{ \\ $ ' " n t s }+["\n"], ignore_invalid_escapes = false) ⇒ Object
we’ve encountered the start of a string…
-
#string=(string) ⇒ Object
just parse a string, not a whole file.
- #tokenize_interpolated_string(token_type, preamble = '') ⇒ Object
Constructor Details
#initialize ⇒ Lexer
Returns a new instance of Lexer.
369 370 371 372 373 |
# File 'lib/puppet/parser/lexer.rb', line 369 def initialize @find = 0 @regex = 0 initvars end |
Instance Attribute Details
#file ⇒ Object
Returns the value of attribute file.
14 15 16 |
# File 'lib/puppet/parser/lexer.rb', line 14 def file @file end |
#indefine ⇒ Object
Returns the value of attribute indefine.
16 17 18 |
# File 'lib/puppet/parser/lexer.rb', line 16 def indefine @indefine end |
#last ⇒ Object (readonly)
Returns the value of attribute last.
14 15 16 |
# File 'lib/puppet/parser/lexer.rb', line 14 def last @last end |
#lexing_context ⇒ Object (readonly)
Returns the value of attribute lexing_context.
14 15 16 |
# File 'lib/puppet/parser/lexer.rb', line 14 def lexing_context @lexing_context end |
#line ⇒ Object
Returns the value of attribute line.
16 17 18 |
# File 'lib/puppet/parser/lexer.rb', line 16 def line @line end |
#token_queue ⇒ Object (readonly)
Returns the value of attribute token_queue.
14 15 16 |
# File 'lib/puppet/parser/lexer.rb', line 14 def token_queue @token_queue end |
Class Method Details
.acceptable?(context = {}) ⇒ Boolean
:stopdoc: # Issue #4161
168 169 170 |
# File 'lib/puppet/parser/lexer.rb', line 168 def (TOKENS[:NUMBER]).acceptable?(context={}) ![:DQPRE,:DQMID].include? context[:after] end |
Instance Method Details
#clear ⇒ Object
289 290 291 |
# File 'lib/puppet/parser/lexer.rb', line 289 def clear initvars end |
#commentpop ⇒ Object
returns the content of the currently accumulated content cache
566 567 568 |
# File 'lib/puppet/parser/lexer.rb', line 566 def commentpop @commentstack.pop[0] end |
#commentpush ⇒ Object
580 581 582 |
# File 'lib/puppet/parser/lexer.rb', line 580 def commentpush @commentstack.push(['', @line]) end |
#expected ⇒ Object
293 294 295 296 297 |
# File 'lib/puppet/parser/lexer.rb', line 293 def expected return nil if @expected.empty? name = @expected[-1] TOKENS.lookup(name) or lex_error "Could not find expected token #{name}" end |
#find_regex_token ⇒ Object
Find the next token that matches a regex. We look for these first.
335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 |
# File 'lib/puppet/parser/lexer.rb', line 335 def find_regex_token @regex += 1 best_token = nil best_length = 0 # I tried optimizing based on the first char, but it had # a slightly negative affect and was a good bit more complicated. TOKENS.regex_tokens.each do |token| if length = @scanner.match?(token.regex) and token.acceptable?(lexing_context) # We've found a longer match if length > best_length best_length = length best_token = token end end end return best_token, @scanner.scan(best_token.regex) if best_token end |
#find_string_token ⇒ Object
323 324 325 326 327 328 329 330 331 332 |
# File 'lib/puppet/parser/lexer.rb', line 323 def find_string_token # We know our longest string token is three chars, so try each size in turn # until we either match or run out of chars. This way our worst-case is three # tries, where it is otherwise the number of string token we have. Also, # the lookups are optimized hash lookups, instead of regex scans. # s = @scanner.peek(3) token = TOKENS.lookup(s[0,3]) || TOKENS.lookup(s[0,2]) || TOKENS.lookup(s[0,1]) [ token, token && @scanner.scan(token.regex) ] end |
#find_token ⇒ Object
Find the next token, returning the string and the token.
356 357 358 359 |
# File 'lib/puppet/parser/lexer.rb', line 356 def find_token @find += 1 shift_token || find_regex_token || find_string_token end |
#fullscan ⇒ Object
scan the whole file basically just used for testing
301 302 303 304 305 306 307 308 309 310 |
# File 'lib/puppet/parser/lexer.rb', line 301 def fullscan array = [] self.scan { |token, str| # Ignore any definition nesting problems @indefine = false array.push([token,str]) } array end |
#getcomment(line = nil) ⇒ Object
570 571 572 573 574 575 576 577 578 |
# File 'lib/puppet/parser/lexer.rb', line 570 def getcomment(line = nil) comment = @commentstack.last if line.nil? or comment[1] <= line @commentstack.pop @commentstack.push(['', @line]) return comment[0] end '' end |
#indefine? ⇒ Boolean
361 362 363 364 365 366 367 |
# File 'lib/puppet/parser/lexer.rb', line 361 def indefine? if defined?(@indefine) @indefine else false end end |
#initvars ⇒ Object
375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 |
# File 'lib/puppet/parser/lexer.rb', line 375 def initvars @line = 1 @previous_token = nil @scanner = nil @file = nil # AAARRGGGG! okay, regexes in ruby are bloody annoying # no one else has "\n" =~ /\s/ @skip = %r{[ \t\r]+} @namestack = [] @token_queue = [] @indefine = false @expected = [] @commentstack = [ ['', @line] ] @lexing_context = { :after => nil, :start_of_line => true, :string_interpolation_depth => 0 } end |
#lex_error(msg) ⇒ Object
18 19 20 |
# File 'lib/puppet/parser/lexer.rb', line 18 def lex_error msg raise Puppet::LexError.new(msg) end |
#munge_token(token, value) ⇒ Object
Make any necessary changes to the token and/or value.
397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 |
# File 'lib/puppet/parser/lexer.rb', line 397 def munge_token(token, value) @line += 1 if token.incr_line skip if token.skip_text return if token.skip and not token.accumulate? token, value = token.convert(self, value) if token.respond_to?(:convert) return unless token if token.accumulate? comment = @commentstack.pop comment[0] << value + "\n" @commentstack.push(comment) end return if token.skip return token, { :value => value, :line => @line } end |
#namepop ⇒ Object
Go up one in the namespace.
420 421 422 |
# File 'lib/puppet/parser/lexer.rb', line 420 def namepop @namestack.pop end |
#namespace ⇒ Object
Collect the current namespace.
425 426 427 |
# File 'lib/puppet/parser/lexer.rb', line 425 def namespace @namestack.join("::") end |
#namestack(value) ⇒ Object
- This value might have
-
in it, but we don’t care – it’ll be
handled normally when joining, and when popping we want to pop this full value, however long the namespace is.
432 433 434 |
# File 'lib/puppet/parser/lexer.rb', line 432 def namestack(value) @namestack << value end |
#rest ⇒ Object
436 437 438 |
# File 'lib/puppet/parser/lexer.rb', line 436 def rest @scanner.rest end |
#scan {|[false,false]| ... } ⇒ Object
this is the heart of the lexer
441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 |
# File 'lib/puppet/parser/lexer.rb', line 441 def scan #Puppet.debug("entering scan") lex_error "Invalid or empty string" unless @scanner # Skip any initial whitespace. skip until token_queue.empty? and @scanner.eos? do yielded = false matched_token, value = find_token # error out if we didn't match anything at all lex_error "Could not match #{@scanner.rest[/^(\S+|\s+|.*)/]}" unless matched_token newline = matched_token.name == :RETURN # this matches a blank line; eat the previously accumulated comments getcomment if lexing_context[:start_of_line] and newline lexing_context[:start_of_line] = newline final_token, token_value = munge_token(matched_token, value) unless final_token skip next end lexing_context[:after] = final_token.name unless newline lexing_context[:string_interpolation_depth] += 1 if final_token.name == :DQPRE lexing_context[:string_interpolation_depth] -= 1 if final_token.name == :DQPOST value = token_value[:value] if match = @@pairs[value] and final_token.name != :DQUOTE and final_token.name != :SQUOTE @expected << match elsif exp = @expected[-1] and exp == value and final_token.name != :DQUOTE and final_token.name != :SQUOTE @expected.pop end if final_token.name == :LBRACE or final_token.name == :LPAREN commentpush end if final_token.name == :RPAREN commentpop end yield [final_token.name, token_value] if @previous_token namestack(value) if @previous_token.name == :CLASS and value != '{' if @previous_token.name == :DEFINE if indefine? msg = "Cannot nest definition #{value} inside #{@indefine}" self.indefine = false raise Puppet::ParseError, msg end @indefine = value end end @previous_token = final_token skip end @scanner = nil # This indicates that we're done parsing. yield [false,false] end |
#scan_until(regex) ⇒ Object
Provide some limited access to the scanner, for those tokens that need it.
518 519 520 |
# File 'lib/puppet/parser/lexer.rb', line 518 def scan_until(regex) @scanner.scan_until(regex) end |
#shift_token ⇒ Object
319 320 321 |
# File 'lib/puppet/parser/lexer.rb', line 319 def shift_token @token_queue.shift end |
#skip ⇒ Object
Skip any skipchars in our remaining string.
512 513 514 |
# File 'lib/puppet/parser/lexer.rb', line 512 def skip @scanner.skip(@skip) end |
#slurpstring(terminators, escapes = %w{ \\ $ ' " n t s }+["\n"], ignore_invalid_escapes = false) ⇒ Object
we’ve encountered the start of a string… slurp in the rest of the string and return it
524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 |
# File 'lib/puppet/parser/lexer.rb', line 524 def slurpstring(terminators,escapes=%w{ \\ $ ' " n t s }+["\n"],ignore_invalid_escapes=false) # we search for the next quote that isn't preceded by a # backslash; the caret is there to match empty strings str = @scanner.scan_until(/([^\\]|^|[^\\])([\\]{2})*[#{terminators}]/) or lex_error "Unclosed quote after '#{last}' in '#{rest}'" @line += str.count("\n") # literal carriage returns add to the line count. str.gsub!(/\\(.)/m) { ch = $1 if escapes.include? ch case ch when 'n'; "\n" when 't'; "\t" when 's'; " " when "\n"; '' else ch end else Puppet.warning "Unrecognised escape sequence '\\#{ch}'#{file && " in file #{file}"}#{line && " at line #{line}"}" unless ignore_invalid_escapes "\\#{ch}" end } [ str[0..-2],str[-1,1] ] end |
#string=(string) ⇒ Object
just parse a string, not a whole file
561 562 563 |
# File 'lib/puppet/parser/lexer.rb', line 561 def string=(string) @scanner = StringScanner.new(string) end |
#tokenize_interpolated_string(token_type, preamble = '') ⇒ Object
547 548 549 550 551 552 553 554 555 556 557 558 |
# File 'lib/puppet/parser/lexer.rb', line 547 def tokenize_interpolated_string(token_type,preamble='') value,terminator = slurpstring('"$') token_queue << [TOKENS[token_type[terminator]],preamble+value] if terminator != '$' or @scanner.scan(/\{/) token_queue.shift elsif var_name = @scanner.scan(TOKENS[:VARIABLE].regex) token_queue << [TOKENS[:VARIABLE],var_name] tokenize_interpolated_string(DQ_continuation_token_types) else tokenize_interpolated_string(token_type,token_queue.pop.last + terminator) end end |