Class: Puppet::Pops::Parser::Lexer
- Extended by:
- Forwardable
- Defined in:
- lib/puppet/pops/parser/lexer.rb
Defined Under Namespace
Modules: Contextual Classes: Locator, Token, TokenList
Constant Summary collapse
- TOKENS =
TokenList.new
- DQ_initial_token_types =
{'$' => :DQPRE,'"' => :STRING}
- DQ_continuation_token_types =
{'$' => :DQMID,'"' => :DQPOST}
- KEYWORDS =
TokenList.new
- @@pairs =
{ "{" => "}", "(" => ")", "[" => "]", "<|" => "|>", "<<|" => "|>>", "|" => "|" }
Instance Attribute Summary collapse
- #file ⇒ Object
- #indefine ⇒ Object (also: #indefine?)
- #lexing_context ⇒ Object readonly
- #locator ⇒ Object readonly
- #token_queue ⇒ Object readonly
Instance Method Summary collapse
- #assert_numeric(value) ⇒ Object
- #clear ⇒ Object
- #expected ⇒ Object
-
#find_regex_token ⇒ Object
Find the next token that matches a regex.
- #find_string_token ⇒ Object
-
#find_token ⇒ Object
Find the next token, returning the string and the token.
-
#followed_by ⇒ Object
Returns “<eof>” if at end of input, else the following 5 characters with n r t escaped.
- #format_quote(q) ⇒ Object
-
#fullscan ⇒ Object
scan the whole file basically just used for testing.
-
#init_multibyte ⇒ Object
Returns true if ruby version >= 1.9.3 since regexp supports multi-byte matches and expanded character categories like [[:blank:]].
-
#initialize ⇒ Lexer
constructor
A new instance of Lexer.
- #initvars ⇒ Object
- #lex_error(msg) ⇒ Object
-
#line ⇒ Object
Returns the line number (starting from 1) for the current position in the scanned text (at the end of the last produced, but not necessarily consumed..
- #match?(r) ⇒ Boolean
- #multibyte? ⇒ Boolean
-
#munge_token(token, value) ⇒ Object
Make any necessary changes to the token and/or value.
-
#namespace ⇒ Object
Collect the current namespace.
- #pos ⇒ Object
-
#position_in_source ⇒ Object
Returns a hash with the current position in source based on the current lexing context.
-
#positioned_message(msg) ⇒ Object
Formats given message by appending file, line and position if available.
- #replace_false_start_with_text(appendix) ⇒ Object
-
#scan {|[false,false]| ... } ⇒ Object
this is the heart of the lexer.
-
#skip ⇒ Object
Skip any skipchars in our remaining string.
-
#slurpstring(terminators, escapes = %w{ \\ $ ' " r n t s }+["\n"], ignore_invalid_escapes = false) ⇒ Object
we’ve encountered the start of a string…
-
#string=(string) ⇒ Object
just parse a string, not a whole file.
- #tokenize_interpolated_string(token_type, preamble = '') ⇒ Object
- #warn_if_variable_has_hyphen(var_name) ⇒ Object
Constructor Details
#initialize ⇒ Lexer
Returns a new instance of Lexer.
464 465 466 467 |
# File 'lib/puppet/pops/parser/lexer.rb', line 464 def initialize @multibyte = init_multibyte initvars end |
Instance Attribute Details
#indefine ⇒ Object Also known as: indefine?
19 20 21 |
# File 'lib/puppet/pops/parser/lexer.rb', line 19 def indefine @indefine end |
#lexing_context ⇒ Object (readonly)
15 16 17 |
# File 'lib/puppet/pops/parser/lexer.rb', line 15 def lexing_context @lexing_context end |
#locator ⇒ Object (readonly)
17 18 19 |
# File 'lib/puppet/pops/parser/lexer.rb', line 17 def locator @locator end |
#token_queue ⇒ Object (readonly)
15 16 17 |
# File 'lib/puppet/pops/parser/lexer.rb', line 15 def token_queue @token_queue end |
Instance Method Details
#assert_numeric(value) ⇒ Object
469 470 471 472 473 474 475 476 477 |
# File 'lib/puppet/pops/parser/lexer.rb', line 469 def assert_numeric(value) if value =~ /^0[xX].*$/ lex_error (("Not a valid hex number #{value}")) unless value =~ /^0[xX][0-9A-Fa-f]+$/ elsif value =~ /^0[^.].*$/ lex_error(("Not a valid octal number #{value}")) unless value =~ /^0[0-7]+$/ else lex_error(("Not a valid decimal number #{value}")) unless value =~ /0?\d+(?:\.\d+)?(?:[eE]-?\d+)?/ end end |
#clear ⇒ Object
396 397 398 |
# File 'lib/puppet/pops/parser/lexer.rb', line 396 def clear initvars end |
#expected ⇒ Object
400 401 402 403 404 |
# File 'lib/puppet/pops/parser/lexer.rb', line 400 def expected return nil if @expected.empty? name = @expected[-1] TOKENS.lookup(name) or lex_error "Internal Lexer Error: Could not find expected token #{name}" end |
#find_regex_token ⇒ Object
Find the next token that matches a regex. We look for these first.
440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 |
# File 'lib/puppet/pops/parser/lexer.rb', line 440 def find_regex_token best_token = nil best_length = 0 # I tried optimizing based on the first char, but it had # a slightly negative affect and was a good bit more complicated. TOKENS.regex_tokens.each do |token| if length = @scanner.match?(token.regex) and token.acceptable?(lexing_context) # We've found a longer match if length > best_length best_length = length best_token = token end end end return best_token, @scanner.scan(best_token.regex) if best_token end |
#find_string_token ⇒ Object
428 429 430 431 432 433 434 435 436 437 |
# File 'lib/puppet/pops/parser/lexer.rb', line 428 def find_string_token # We know our longest string token is three chars, so try each size in turn # until we either match or run out of chars. This way our worst-case is three # tries, where it is otherwise the number of string token we have. Also, # the lookups are optimized hash lookups, instead of regex scans. # s = @scanner.peek(3) token = TOKENS.lookup(s[0,3]) || TOKENS.lookup(s[0,2]) || TOKENS.lookup(s[0,1]) [ token, token && @scanner.scan(token.regex) ] end |
#find_token ⇒ Object
Find the next token, returning the string and the token.
460 461 462 |
# File 'lib/puppet/pops/parser/lexer.rb', line 460 def find_token shift_token || find_regex_token || find_string_token end |
#followed_by ⇒ Object
Returns “<eof>” if at end of input, else the following 5 characters with n r t escaped
699 700 701 702 703 704 705 706 |
# File 'lib/puppet/pops/parser/lexer.rb', line 699 def followed_by return "<eof>" if @scanner.eos? result = @scanner.rest[0,5] + "..." result.gsub!("\t", '\t') result.gsub!("\n", '\n') result.gsub!("\r", '\r') result end |
#format_quote(q) ⇒ Object
708 709 710 711 712 713 714 |
# File 'lib/puppet/pops/parser/lexer.rb', line 708 def format_quote q if q == "'" '"\'"' else "'#{q}'" end end |
#fullscan ⇒ Object
scan the whole file basically just used for testing
408 409 410 411 412 413 414 415 416 417 |
# File 'lib/puppet/pops/parser/lexer.rb', line 408 def fullscan array = [] self.scan { |token, str| # Ignore any definition nesting problems @indefine = false array.push([token,str]) } array end |
#init_multibyte ⇒ Object
Returns true if ruby version >= 1.9.3 since regexp supports multi-byte matches and expanded character categories like [[:blank:]].
This implementation will fail if there are more than 255 minor or micro versions of ruby
484 485 486 487 488 |
# File 'lib/puppet/pops/parser/lexer.rb', line 484 def init_multibyte numver = RUBY_VERSION.split(".").collect {|s| s.to_i } return true if (numver[0] << 16 | numver[1] << 8 | numver[2]) >= (1 << 16 | 9 << 8 | 3) false end |
#initvars ⇒ Object
494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 |
# File 'lib/puppet/pops/parser/lexer.rb', line 494 def initvars @previous_token = nil @scanner = nil @file = nil # AAARRGGGG! okay, regexes in ruby are bloody annoying # no one else has "\n" =~ /\s/ if multibyte? # Skip all kinds of space, and CR, but not newlines @skip = %r{[[:blank:]\r]+} else @skip = %r{[ \t\r]+} end @namestack = [] @token_queue = [] @indefine = false @expected = [] @lexing_context = { :after => nil, :start_of_line => true, :offset => 0, # byte offset before where token starts :end_offset => 0, # byte offset after scanned token :brace_count => 0, # nested depth of braces :interpolation_stack => [] # matching interpolation brace level } end |
#lex_error(msg) ⇒ Object
22 23 24 |
# File 'lib/puppet/pops/parser/lexer.rb', line 22 def lex_error msg raise Puppet::LexError.new(msg) end |
#line ⇒ Object
Returns the line number (starting from 1) for the current position in the scanned text (at the end of the last produced, but not necessarily consumed.
782 783 784 785 |
# File 'lib/puppet/pops/parser/lexer.rb', line 782 def line return 1 unless lexing_context && locator locator.line_for_offset(lexing_context[:end_offset]) end |
#match?(r) ⇒ Boolean
656 657 658 |
# File 'lib/puppet/pops/parser/lexer.rb', line 656 def match? r @scanner.match?(r) end |
#multibyte? ⇒ Boolean
490 491 492 |
# File 'lib/puppet/pops/parser/lexer.rb', line 490 def multibyte? @multibyte end |
#munge_token(token, value) ⇒ Object
Make any necessary changes to the token and/or value.
524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 |
# File 'lib/puppet/pops/parser/lexer.rb', line 524 def munge_token(token, value) # A token may already have been munged (converted and positioned) # return token, value if value.is_a? Hash skip if token.skip_text return if token.skip token, value = token.convert(self, value) if token.respond_to?(:convert) return unless token return if token.skip # If the conversion performed the munging/positioning return token, value if value.is_a? Hash pos_hash = position_in_source pos_hash[:value] = value # Add one to pos, first char on line is 1 return token, pos_hash end |
#namespace ⇒ Object
Collect the current namespace.
573 574 575 |
# File 'lib/puppet/pops/parser/lexer.rb', line 573 def namespace @namestack.join("::") end |
#pos ⇒ Object
560 561 562 |
# File 'lib/puppet/pops/parser/lexer.rb', line 560 def pos @locator.pos_on_line(lexing_context[:offset]) end |
#position_in_source ⇒ Object
Returns a hash with the current position in source based on the current lexing context
551 552 553 554 555 556 557 558 |
# File 'lib/puppet/pops/parser/lexer.rb', line 551 def position_in_source pos = @locator.pos_on_line(lexing_context[:offset]) offset = @locator.char_offset(lexing_context[:offset]) length = @locator.char_length(lexing_context[:offset], lexing_context[:end_offset]) start_line = @locator.line_for_offset(lexing_context[:offset]) return { :line => start_line, :pos => pos, :offset => offset, :length => length} end |
#positioned_message(msg) ⇒ Object
Formats given message by appending file, line and position if available.
691 692 693 694 695 696 |
# File 'lib/puppet/pops/parser/lexer.rb', line 691 def msg result = [msg] result << "in file #{file}" if file result << "at line #{line}:#{pos}" if line result.join(" ") end |
#replace_false_start_with_text(appendix) ⇒ Object
756 757 758 759 760 761 762 763 764 |
# File 'lib/puppet/pops/parser/lexer.rb', line 756 def replace_false_start_with_text(appendix) last_token = token_queue.pop value = last_token.last if value.is_a? Hash value[:value] + appendix else value + appendix end end |
#scan {|[false,false]| ... } ⇒ Object
this is the heart of the lexer
579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 |
# File 'lib/puppet/pops/parser/lexer.rb', line 579 def scan #Puppet.debug("entering scan") lex_error "Internal Error: No string or file given to lexer to process." unless @scanner # Skip any initial whitespace. skip until token_queue.empty? and @scanner.eos? do offset = @scanner.pos matched_token, value = find_token end_offset = @scanner.pos # error out if we didn't match anything at all lex_error "Could not match #{@scanner.rest[/^(\S+|\s+|.*)/]}" unless matched_token newline = matched_token.name == :RETURN lexing_context[:start_of_line] = newline lexing_context[:offset] = offset lexing_context[:end_offset] = end_offset final_token, token_value = munge_token(matched_token, value) # update end position since munging may have moved the end offset lexing_context[:end_offset] = @scanner.pos unless final_token skip next end lexing_context[:after] = final_token.name unless newline if final_token.name == :DQPRE lexing_context[:interpolation_stack] << lexing_context[:brace_count] elsif final_token.name == :DQPOST lexing_context[:interpolation_stack].pop end value = token_value[:value] if match = @@pairs[value] and final_token.name != :DQUOTE and final_token.name != :SQUOTE @expected << match elsif exp = @expected[-1] and exp == value and final_token.name != :DQUOTE and final_token.name != :SQUOTE @expected.pop end yield [final_token.name, token_value] if @previous_token namestack(value) if @previous_token.name == :CLASS and value != '{' if @previous_token.name == :DEFINE if indefine? msg = "Cannot nest definition #{value} inside #{@indefine}" self.indefine = false raise Puppet::ParseError, msg end @indefine = value end end @previous_token = final_token skip end # Cannot reset @scanner to nil here - it is needed to answer questions about context after # completed parsing. # Seems meaningless to do this. Everything will be gc anyway. #@scanner = nil # This indicates that we're done parsing. yield [false,false] end |
#skip ⇒ Object
Skip any skipchars in our remaining string.
652 653 654 |
# File 'lib/puppet/pops/parser/lexer.rb', line 652 def skip @scanner.skip(@skip) end |
#slurpstring(terminators, escapes = %w{ \\ $ ' " r n t s }+["\n"], ignore_invalid_escapes = false) ⇒ Object
we’ve encountered the start of a string… slurp in the rest of the string and return it
666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 |
# File 'lib/puppet/pops/parser/lexer.rb', line 666 def slurpstring(terminators,escapes=%w{ \\ $ ' " r n t s }+["\n"],ignore_invalid_escapes=false) # we search for the next quote that isn't preceded by a # backslash; the caret is there to match empty strings last = @scanner.matched str = @scanner.scan_until(/([^\\]|^|[^\\])([\\]{2})*[#{terminators}]/) || lex_error(("Unclosed quote after #{format_quote(last)} followed by '#{followed_by}'")) str.gsub!(/\\(.)/m) { ch = $1 if escapes.include? ch case ch when 'r'; "\r" when 'n'; "\n" when 't'; "\t" when 's'; " " when "\n"; '' else ch end else Puppet.warning(("Unrecognized escape sequence '\\#{ch}'")) unless ignore_invalid_escapes "\\#{ch}" end } [ str[0..-2],str[-1,1] ] end |
#string=(string) ⇒ Object
just parse a string, not a whole file
767 768 769 770 |
# File 'lib/puppet/pops/parser/lexer.rb', line 767 def string=(string) @scanner = StringScanner.new(string) @locator = Locator.new(string, multibyte?) end |
#tokenize_interpolated_string(token_type, preamble = '') ⇒ Object
716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 |
# File 'lib/puppet/pops/parser/lexer.rb', line 716 def tokenize_interpolated_string(token_type,preamble='') # Expecting a (possibly empty) stretch of text terminated by end of string ", a variable $, or expression ${ # The length of this part includes the start and terminating characters. value,terminator = slurpstring('"$') # Advanced after '{' if this is in expression ${} interpolation braced = terminator == '$' && @scanner.scan(/\{/) # make offset to end_ofset be the length of the pre expression string including its start and terminating chars lexing_context[:end_offset] = @scanner.pos token_queue << [TOKENS[token_type[terminator]],position_in_source().merge!({:value => preamble+value})] variable_regex = if Puppet[:allow_variables_with_dashes] TOKENS[:VARIABLE_WITH_DASH].regex else TOKENS[:VARIABLE].regex end if terminator != '$' or braced return token_queue.shift end tmp_offset = @scanner.pos if var_name = @scanner.scan(variable_regex) lexing_context[:offset] = tmp_offset lexing_context[:end_offset] = @scanner.pos warn_if_variable_has_hyphen(var_name) # If the varname after ${ is followed by (, it is a function call, and not a variable # reference. # if braced && @scanner.match?(%r{[ \t\r]*\(}) token_queue << [TOKENS[:NAME], position_in_source().merge!({:value=>var_name})] else token_queue << [TOKENS[:VARIABLE],position_in_source().merge!({:value=>var_name})] end lexing_context[:offset] = @scanner.pos tokenize_interpolated_string(DQ_continuation_token_types) else tokenize_interpolated_string(token_type, replace_false_start_with_text(terminator)) end end |
#warn_if_variable_has_hyphen(var_name) ⇒ Object
772 773 774 775 776 |
# File 'lib/puppet/pops/parser/lexer.rb', line 772 def warn_if_variable_has_hyphen(var_name) if var_name.include?('-') Puppet.deprecation_warning("Using `-` in variable names is deprecated at #{file || '<string>'}:#{line}. See http://links.puppetlabs.com/puppet-hyphenated-variable-deprecation") end end |