Class: Puppet::Parser::Lexer
- Extended by:
- Forwardable
- Defined in:
- lib/puppet/parser/lexer.rb
Defined Under Namespace
Modules: Contextual Classes: Token, TokenList
Constant Summary collapse
- TOKENS =
TokenList.new
- DQ_initial_token_types =
{'$' => :DQPRE,'"' => :STRING}
- DQ_continuation_token_types =
{'$' => :DQMID,'"' => :DQPOST}
- KEYWORDS =
TokenList.new
- @@pairs =
{ "{" => "}", "(" => ")", "[" => "]", "<|" => "|>", "<<|" => "|>>" }
Instance Attribute Summary collapse
- #file ⇒ Object
- #indefine ⇒ Object (also: #indefine?)
- #last ⇒ Object readonly
- #lexing_context ⇒ Object readonly
- #line ⇒ Object
- #token_queue ⇒ Object readonly
Instance Method Summary collapse
- #clear ⇒ Object
-
#commentpop ⇒ Object
returns the content of the currently accumulated content cache.
- #commentpush ⇒ Object
- #expected ⇒ Object
-
#find_regex_token ⇒ Object
Find the next token that matches a regex.
- #find_string_token ⇒ Object
-
#find_token ⇒ Object
Find the next token, returning the string and the token.
-
#fullscan ⇒ Object
scan the whole file basically just used for testing.
- #getcomment(line = nil) ⇒ Object
-
#initialize ⇒ Lexer
constructor
A new instance of Lexer.
- #initvars ⇒ Object
- #lex_error(msg) ⇒ Object
-
#munge_token(token, value) ⇒ Object
Make any necessary changes to the token and/or value.
-
#namespace ⇒ Object
Collect the current namespace.
-
#pos ⇒ Object
Returns the position on the line.
-
#scan {|[false,false]| ... } ⇒ Object
this is the heart of the lexer.
-
#skip ⇒ Object
Skip any skipchars in our remaining string.
-
#slurpstring(terminators, escapes = %w{ \\ $ ' " r n t s }+["\n"], ignore_invalid_escapes = false) ⇒ Object
we’ve encountered the start of a string…
-
#string=(string) ⇒ Object
just parse a string, not a whole file.
- #tokenize_interpolated_string(token_type, preamble = '') ⇒ Object
- #warn_if_variable_has_hyphen(var_name) ⇒ Object
Constructor Details
#initialize ⇒ Lexer
Returns a new instance of Lexer.
392 393 394 |
# File 'lib/puppet/parser/lexer.rb', line 392 def initialize initvars end |
Instance Attribute Details
#indefine ⇒ Object Also known as: indefine?
20 21 22 |
# File 'lib/puppet/parser/lexer.rb', line 20 def indefine @indefine end |
#lexing_context ⇒ Object (readonly)
18 19 20 |
# File 'lib/puppet/parser/lexer.rb', line 18 def lexing_context @lexing_context end |
#token_queue ⇒ Object (readonly)
18 19 20 |
# File 'lib/puppet/parser/lexer.rb', line 18 def token_queue @token_queue end |
Instance Method Details
#commentpop ⇒ Object
returns the content of the currently accumulated content cache
585 586 587 |
# File 'lib/puppet/parser/lexer.rb', line 585 def commentpop @commentstack.pop[0] end |
#commentpush ⇒ Object
599 600 601 |
# File 'lib/puppet/parser/lexer.rb', line 599 def commentpush @commentstack.push(['', @line]) end |
#expected ⇒ Object
328 329 330 331 332 |
# File 'lib/puppet/parser/lexer.rb', line 328 def expected return nil if @expected.empty? name = @expected[-1] TOKENS.lookup(name) or lex_error "Could not find expected token #{name}" end |
#find_regex_token ⇒ Object
Find the next token that matches a regex. We look for these first.
368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 |
# File 'lib/puppet/parser/lexer.rb', line 368 def find_regex_token best_token = nil best_length = 0 # I tried optimizing based on the first char, but it had # a slightly negative affect and was a good bit more complicated. TOKENS.regex_tokens.each do |token| if length = @scanner.match?(token.regex) and token.acceptable?(lexing_context) # We've found a longer match if length > best_length best_length = length best_token = token end end end return best_token, @scanner.scan(best_token.regex) if best_token end |
#find_string_token ⇒ Object
356 357 358 359 360 361 362 363 364 365 |
# File 'lib/puppet/parser/lexer.rb', line 356 def find_string_token # We know our longest string token is three chars, so try each size in turn # until we either match or run out of chars. This way our worst-case is three # tries, where it is otherwise the number of string token we have. Also, # the lookups are optimized hash lookups, instead of regex scans. # s = @scanner.peek(3) token = TOKENS.lookup(s[0,3]) || TOKENS.lookup(s[0,2]) || TOKENS.lookup(s[0,1]) [ token, token && @scanner.scan(token.regex) ] end |
#find_token ⇒ Object
Find the next token, returning the string and the token.
388 389 390 |
# File 'lib/puppet/parser/lexer.rb', line 388 def find_token shift_token || find_regex_token || find_string_token end |
#fullscan ⇒ Object
scan the whole file basically just used for testing
336 337 338 339 340 341 342 343 344 345 |
# File 'lib/puppet/parser/lexer.rb', line 336 def fullscan array = [] self.scan { |token, str| # Ignore any definition nesting problems @indefine = false array.push([token,str]) } array end |
#getcomment(line = nil) ⇒ Object
589 590 591 592 593 594 595 596 597 |
# File 'lib/puppet/parser/lexer.rb', line 589 def getcomment(line = nil) comment = @commentstack.last if line.nil? or comment[1] <= line @commentstack.pop @commentstack.push(['', @line]) return comment[0] end '' end |
#initvars ⇒ Object
396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 |
# File 'lib/puppet/parser/lexer.rb', line 396 def initvars @line = 1 @previous_token = nil @scanner = nil @file = nil # AAARRGGGG! okay, regexes in ruby are bloody annoying # no one else has "\n" =~ /\s/ @skip = %r{[ \t\r]+} @namestack = [] @token_queue = [] @indefine = false @expected = [] @commentstack = [ ['', @line] ] @lexing_context = { :after => nil, :start_of_line => true, :string_interpolation_depth => 0 } end |
#lex_error(msg) ⇒ Object
32 33 34 |
# File 'lib/puppet/parser/lexer.rb', line 32 def lex_error msg raise Puppet::LexError.new(msg) end |
#munge_token(token, value) ⇒ Object
Make any necessary changes to the token and/or value.
418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 |
# File 'lib/puppet/parser/lexer.rb', line 418 def munge_token(token, value) @line += 1 if token.incr_line skip if token.skip_text return if token.skip and not token.accumulate? token, value = token.convert(self, value) if token.respond_to?(:convert) return unless token if token.accumulate? comment = @commentstack.pop comment[0] << value + "\n" @commentstack.push(comment) end return if token.skip return token, { :value => value, :line => @line } end |
#namespace ⇒ Object
Collect the current namespace.
448 449 450 |
# File 'lib/puppet/parser/lexer.rb', line 448 def namespace @namestack.join("::") end |
#pos ⇒ Object
Returns the position on the line. This implementation always returns nil. It is here for API reasons in Puppet::Error which needs to support both –parser current, and –parser future.
27 28 29 30 |
# File 'lib/puppet/parser/lexer.rb', line 27 def pos # Make the lexer comply with newer API. It does not produce a pos... nil end |
#scan {|[false,false]| ... } ⇒ Object
this is the heart of the lexer
455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 |
# File 'lib/puppet/parser/lexer.rb', line 455 def scan #Puppet.debug("entering scan") lex_error "Invalid or empty string" unless @scanner # Skip any initial whitespace. skip until token_queue.empty? and @scanner.eos? do matched_token, value = find_token # error out if we didn't match anything at all lex_error "Could not match #{@scanner.rest[/^(\S+|\s+|.*)/]}" unless matched_token newline = matched_token.name == :RETURN # this matches a blank line; eat the previously accumulated comments getcomment if lexing_context[:start_of_line] and newline lexing_context[:start_of_line] = newline final_token, token_value = munge_token(matched_token, value) unless final_token skip next end final_token_name = final_token.name lexing_context[:after] = final_token_name unless newline lexing_context[:string_interpolation_depth] += 1 if final_token_name == :DQPRE lexing_context[:string_interpolation_depth] -= 1 if final_token_name == :DQPOST value = token_value[:value] if match = @@pairs[value] and final_token_name != :DQUOTE and final_token_name != :SQUOTE @expected << match elsif exp = @expected[-1] and exp == value and final_token_name != :DQUOTE and final_token_name != :SQUOTE @expected.pop end if final_token_name == :LBRACE or final_token_name == :LPAREN commentpush end if final_token_name == :RPAREN commentpop end yield [final_token_name, token_value] if @previous_token namestack(value) if @previous_token.name == :CLASS and value != '{' if @previous_token.name == :DEFINE if indefine? msg = "Cannot nest definition #{value} inside #{@indefine}" self.indefine = false raise Puppet::ParseError, msg end @indefine = value end end @previous_token = final_token skip end @scanner = nil # This indicates that we're done parsing. yield [false,false] end |
#skip ⇒ Object
Skip any skipchars in our remaining string.
526 527 528 |
# File 'lib/puppet/parser/lexer.rb', line 526 def skip @scanner.skip(@skip) end |
#slurpstring(terminators, escapes = %w{ \\ $ ' " r n t s }+["\n"], ignore_invalid_escapes = false) ⇒ Object
we’ve encountered the start of a string… slurp in the rest of the string and return it
536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 |
# File 'lib/puppet/parser/lexer.rb', line 536 def slurpstring(terminators,escapes=%w{ \\ $ ' " r n t s }+["\n"],ignore_invalid_escapes=false) # we search for the next quote that isn't preceded by a # backslash; the caret is there to match empty strings str = @scanner.scan_until(/([^\\]|^|[^\\])([\\]{2})*[#{terminators}]/) or lex_error "Unclosed quote after '#{last}' in '#{rest}'" @line += str.count("\n") # literal carriage returns add to the line count. str.gsub!(/\\(.)/m) { ch = $1 if escapes.include? ch case ch when 'r'; "\r" when 'n'; "\n" when 't'; "\t" when 's'; " " when "\n"; '' else ch end else Puppet.warning "Unrecognised escape sequence '\\#{ch}'#{file && " in file #{file}"}#{line && " at line #{line}"}" unless ignore_invalid_escapes "\\#{ch}" end } [ str[0..-2],str[-1,1] ] end |
#string=(string) ⇒ Object
just parse a string, not a whole file
580 581 582 |
# File 'lib/puppet/parser/lexer.rb', line 580 def string=(string) @scanner = StringScanner.new(string) end |
#tokenize_interpolated_string(token_type, preamble = '') ⇒ Object
560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 |
# File 'lib/puppet/parser/lexer.rb', line 560 def tokenize_interpolated_string(token_type,preamble='') value,terminator = slurpstring('"$') token_queue << [TOKENS[token_type[terminator]],preamble+value] variable_regex = if Puppet[:allow_variables_with_dashes] TOKENS[:VARIABLE_WITH_DASH].regex else TOKENS[:VARIABLE].regex end if terminator != '$' or @scanner.scan(/\{/) token_queue.shift elsif var_name = @scanner.scan(variable_regex) warn_if_variable_has_hyphen(var_name) token_queue << [TOKENS[:VARIABLE],var_name] tokenize_interpolated_string(DQ_continuation_token_types) else tokenize_interpolated_string(token_type,token_queue.pop.last + terminator) end end |
#warn_if_variable_has_hyphen(var_name) ⇒ Object
603 604 605 606 607 |
# File 'lib/puppet/parser/lexer.rb', line 603 def warn_if_variable_has_hyphen(var_name) if var_name.include?('-') Puppet.deprecation_warning("Using `-` in variable names is deprecated at #{file || '<string>'}:#{line}. See http://links.puppetlabs.com/puppet-hyphenated-variable-deprecation") end end |