Class: Puppet::Parser::Lexer

Inherits:

Object

Object
Puppet::Parser::Lexer

show all

Extended by:: Forwardable

Defined in:: lib/puppet/parser/lexer.rb

Defined Under Namespace

Modules: Contextual Classes: Token, TokenList

Constant Summary collapse

TOKENS =

TokenList.new

DQ_initial_token_types =

{'$' => :DQPRE,'"' => :STRING}

DQ_continuation_token_types =

{'$' => :DQMID,'"' => :DQPOST}

KEYWORDS =

TokenList.new

@@pairs =

{
  "{"   => "}",
  "("   => ")",
  "["   => "]",
  "<|"  => "|>",
  "<<|" => "|>>"
}

Instance Attribute Summary collapse

#file ⇒ Object
#indefine ⇒ Object (also: #indefine?)
#last ⇒ Object readonly
#lexing_context ⇒ Object readonly
#line ⇒ Object
#token_queue ⇒ Object readonly

Instance Method Summary collapse

#clear ⇒ Object
#commentpop ⇒ Object

returns the content of the currently accumulated content cache.
#commentpush ⇒ Object
#expected ⇒ Object
#find_regex_token ⇒ Object

Find the next token that matches a regex.
#find_string_token ⇒ Object
#find_token ⇒ Object

Find the next token, returning the string and the token.
#fullscan ⇒ Object

scan the whole file basically just used for testing.
#getcomment(line = nil) ⇒ Object
#initialize ⇒ Lexer constructor

A new instance of Lexer.
#initvars ⇒ Object
#lex_error(msg) ⇒ Object
#munge_token(token, value) ⇒ Object

Make any necessary changes to the token and/or value.
#namespace ⇒ Object

Collect the current namespace.
#pos ⇒ Object

Returns the position on the line.
#scan {|[false,false]| ... } ⇒ Object

this is the heart of the lexer.
#skip ⇒ Object

Skip any skipchars in our remaining string.
#slurpstring(terminators, escapes = %w{ \\ $ ' " r n t s }+["\n"], ignore_invalid_escapes = false) ⇒ Object

we’ve encountered the start of a string…
#string=(string) ⇒ Object

just parse a string, not a whole file.
#tokenize_interpolated_string(token_type, preamble = '') ⇒ Object
#warn_if_variable_has_hyphen(var_name) ⇒ Object

Constructor Details

#initialize ⇒ `Lexer`

Returns a new instance of Lexer.



392
393
394

# File 'lib/puppet/parser/lexer.rb', line 392

def initialize
  initvars
end

Instance Attribute Details

#file ⇒ `Object`



18
19
20

# File 'lib/puppet/parser/lexer.rb', line 18

def file
  @file
end

#indefine ⇒ `Object` Also known as: indefine?



20
21
22

# File 'lib/puppet/parser/lexer.rb', line 20

def indefine
  @indefine
end

#last ⇒ `Object` (readonly)



18
19
20

# File 'lib/puppet/parser/lexer.rb', line 18

def last
  @last
end

#lexing_context ⇒ `Object` (readonly)



18
19
20

# File 'lib/puppet/parser/lexer.rb', line 18

def lexing_context
  @lexing_context
end

#line ⇒ `Object`



20
21
22

# File 'lib/puppet/parser/lexer.rb', line 20

def line
  @line
end

#token_queue ⇒ `Object` (readonly)



18
19
20

# File 'lib/puppet/parser/lexer.rb', line 18

def token_queue
  @token_queue
end

Instance Method Details

#clear ⇒ `Object`



324
325
326

# File 'lib/puppet/parser/lexer.rb', line 324

def clear
  initvars
end

#commentpop ⇒ `Object`

returns the content of the currently accumulated content cache



585
586
587

# File 'lib/puppet/parser/lexer.rb', line 585

def commentpop
  @commentstack.pop[0]
end

#commentpush ⇒ `Object`



599
600
601

# File 'lib/puppet/parser/lexer.rb', line 599

def commentpush
  @commentstack.push(['', @line])
end

#expected ⇒ `Object`

# File 'lib/puppet/parser/lexer.rb', line 328

def expected
  return nil if @expected.empty?
  name = @expected[-1]
  TOKENS.lookup(name) or lex_error "Could not find expected token #{name}"
end

#find_regex_token ⇒ `Object`

Find the next token that matches a regex. We look for these first.

# File 'lib/puppet/parser/lexer.rb', line 368

def find_regex_token
  best_token = nil
  best_length = 0

  # I tried optimizing based on the first char, but it had
  # a slightly negative affect and was a good bit more complicated.
  TOKENS.regex_tokens.each do |token|
    if length = @scanner.match?(token.regex) and token.acceptable?(lexing_context)
      # We've found a longer match
      if length > best_length
        best_length = length
        best_token = token
      end
    end
  end

  return best_token, @scanner.scan(best_token.regex) if best_token
end

#find_string_token ⇒ `Object`

# File 'lib/puppet/parser/lexer.rb', line 356

def find_string_token
  # We know our longest string token is three chars, so try each size in turn
  # until we either match or run out of chars.  This way our worst-case is three
  # tries, where it is otherwise the number of string token we have.  Also,
  # the lookups are optimized hash lookups, instead of regex scans.
  #
  s = @scanner.peek(3)
  token = TOKENS.lookup(s[0,3]) || TOKENS.lookup(s[0,2]) || TOKENS.lookup(s[0,1])
  [ token, token && @scanner.scan(token.regex) ]
end

#find_token ⇒ `Object`

Find the next token, returning the string and the token.



388
389
390

# File 'lib/puppet/parser/lexer.rb', line 388

def find_token
  shift_token || find_regex_token || find_string_token
end

#fullscan ⇒ `Object`

scan the whole file basically just used for testing

# File 'lib/puppet/parser/lexer.rb', line 336

def fullscan
  array = []

  self.scan { |token, str|
    # Ignore any definition nesting problems
    @indefine = false
    array.push([token,str])
  }
  array
end

#getcomment(line = nil) ⇒ `Object`

# File 'lib/puppet/parser/lexer.rb', line 589

def getcomment(line = nil)
  comment = @commentstack.last
  if line.nil? or comment[1] <= line
    @commentstack.pop
    @commentstack.push(['', @line])
    return comment[0]
  end
  ''
end

#initvars ⇒ `Object`

# File 'lib/puppet/parser/lexer.rb', line 396

def initvars
  @line = 1
  @previous_token = nil
  @scanner = nil
  @file = nil
  # AAARRGGGG! okay, regexes in ruby are bloody annoying
  # no one else has "\n" =~ /\s/
  @skip = %r{[ \t\r]+}

  @namestack = []
  @token_queue = []
  @indefine = false
  @expected = []
  @commentstack = [ ['', @line] ]
  @lexing_context = {
    :after => nil,
    :start_of_line => true,
    :string_interpolation_depth => 0
    }
end

#lex_error(msg) ⇒ `Object`

Raises:

(Puppet::LexError)



32
33
34

# File 'lib/puppet/parser/lexer.rb', line 32

def lex_error msg
  raise Puppet::LexError.new(msg)
end

#munge_token(token, value) ⇒ `Object`

Make any necessary changes to the token and/or value.

# File 'lib/puppet/parser/lexer.rb', line 418

def munge_token(token, value)
  @line += 1 if token.incr_line

  skip if token.skip_text

  return if token.skip and not token.accumulate?

  token, value = token.convert(self, value) if token.respond_to?(:convert)

  return unless token

  if token.accumulate?
    comment = @commentstack.pop
    comment[0] << value + "\n"
    @commentstack.push(comment)
  end

  return if token.skip

  return token, { :value => value, :line => @line }
end

#namespace ⇒ `Object`

Collect the current namespace.



448
449
450

# File 'lib/puppet/parser/lexer.rb', line 448

def namespace
  @namestack.join("::")
end

#pos ⇒ `Object`

Returns the position on the line. This implementation always returns nil. It is here for API reasons in Puppet::Error which needs to support both –parser current, and –parser future.

# File 'lib/puppet/parser/lexer.rb', line 27

def pos
  # Make the lexer comply with newer API. It does not produce a pos...
  nil
end

#scan {|[false,false]| ... } ⇒ `Object`

this is the heart of the lexer

Yields:

([false,false])

# File 'lib/puppet/parser/lexer.rb', line 455

def scan
  #Puppet.debug("entering scan")
  lex_error "Invalid or empty string" unless @scanner

  # Skip any initial whitespace.
  skip

  until token_queue.empty? and @scanner.eos? do
    matched_token, value = find_token

    # error out if we didn't match anything at all
    lex_error "Could not match #{@scanner.rest[/^(\S+|\s+|.*)/]}" unless matched_token

    newline = matched_token.name == :RETURN

    # this matches a blank line; eat the previously accumulated comments
    getcomment if lexing_context[:start_of_line] and newline
    lexing_context[:start_of_line] = newline

    final_token, token_value = munge_token(matched_token, value)

    unless final_token
      skip
      next
    end

    final_token_name = final_token.name
    lexing_context[:after]         = final_token_name unless newline
    lexing_context[:string_interpolation_depth] += 1 if final_token_name == :DQPRE
    lexing_context[:string_interpolation_depth] -= 1 if final_token_name == :DQPOST

    value = token_value[:value]

    if match = @@pairs[value] and final_token_name != :DQUOTE and final_token_name != :SQUOTE
      @expected << match
    elsif exp = @expected[-1] and exp == value and final_token_name != :DQUOTE and final_token_name != :SQUOTE
      @expected.pop
    end

    if final_token_name == :LBRACE or final_token_name == :LPAREN
      commentpush
    end
    if final_token_name == :RPAREN
      commentpop
    end

    yield [final_token_name, token_value]

    if @previous_token
      namestack(value) if @previous_token.name == :CLASS and value != '{'

      if @previous_token.name == :DEFINE
        if indefine?
          msg = "Cannot nest definition #{value} inside #{@indefine}"
          self.indefine = false
          raise Puppet::ParseError, msg
        end

        @indefine = value
      end
    end
    @previous_token = final_token
    skip
  end
  @scanner = nil

  # This indicates that we're done parsing.
  yield [false,false]
end

#skip ⇒ `Object`

Skip any skipchars in our remaining string.



526
527
528

# File 'lib/puppet/parser/lexer.rb', line 526

def skip
  @scanner.skip(@skip)
end

#slurpstring(terminators, escapes = %w{ \\ $ ' " r n t s }+["\n"], ignore_invalid_escapes = false) ⇒ `Object`

we’ve encountered the start of a string… slurp in the rest of the string and return it

# File 'lib/puppet/parser/lexer.rb', line 536

def slurpstring(terminators,escapes=%w{ \\  $ ' " r n t s }+["\n"],ignore_invalid_escapes=false)
  # we search for the next quote that isn't preceded by a
  # backslash; the caret is there to match empty strings
  str = @scanner.scan_until(/([^\\]|^|[^\\])([\\]{2})*[#{terminators}]/) or lex_error "Unclosed quote after '#{last}' in '#{rest}'"
  @line += str.count("\n") # literal carriage returns add to the line count.
  str.gsub!(/\\(.)/m) {
    ch = $1
    if escapes.include? ch
      case ch
      when 'r'; "\r"
      when 'n'; "\n"
      when 't'; "\t"
      when 's'; " "
      when "\n"; ''
      else      ch
      end
    else
      Puppet.warning "Unrecognised escape sequence '\\#{ch}'#{file && " in file #{file}"}#{line && " at line #{line}"}" unless ignore_invalid_escapes
      "\\#{ch}"
    end
  }
  [ str[0..-2],str[-1,1] ]
end

#string=(string) ⇒ `Object`

just parse a string, not a whole file



580
581
582

# File 'lib/puppet/parser/lexer.rb', line 580

def string=(string)
  @scanner = StringScanner.new(string)
end

#tokenize_interpolated_string(token_type, preamble = '') ⇒ `Object`

# File 'lib/puppet/parser/lexer.rb', line 560

def tokenize_interpolated_string(token_type,preamble='')
  value,terminator = slurpstring('"$')
  token_queue << [TOKENS[token_type[terminator]],preamble+value]
  variable_regex = if Puppet[:allow_variables_with_dashes]
                     TOKENS[:VARIABLE_WITH_DASH].regex
                   else
                     TOKENS[:VARIABLE].regex
                   end
  if terminator != '$' or @scanner.scan(/\{/)
    token_queue.shift
  elsif var_name = @scanner.scan(variable_regex)
    warn_if_variable_has_hyphen(var_name)
    token_queue << [TOKENS[:VARIABLE],var_name]
    tokenize_interpolated_string(DQ_continuation_token_types)
  else
    tokenize_interpolated_string(token_type,token_queue.pop.last + terminator)
  end
end

#warn_if_variable_has_hyphen(var_name) ⇒ `Object`

# File 'lib/puppet/parser/lexer.rb', line 603

def warn_if_variable_has_hyphen(var_name)
  if var_name.include?('-')
    Puppet.deprecation_warning("Using `-` in variable names is deprecated at #{file || '<string>'}:#{line}. See http://links.puppetlabs.com/puppet-hyphenated-variable-deprecation")
  end
end

Class: Puppet::Parser::Lexer

Defined Under Namespace

Constant Summary collapse

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize ⇒ Lexer

Instance Attribute Details

#file ⇒ Object

#indefine ⇒ Object Also known as: indefine?

#last ⇒ Object (readonly)

#lexing_context ⇒ Object (readonly)

#line ⇒ Object

#token_queue ⇒ Object (readonly)

Instance Method Details

#clear ⇒ Object

#commentpop ⇒ Object

#commentpush ⇒ Object

#expected ⇒ Object

#find_regex_token ⇒ Object

#find_string_token ⇒ Object

#find_token ⇒ Object

#fullscan ⇒ Object

#getcomment(line = nil) ⇒ Object

#initvars ⇒ Object

#lex_error(msg) ⇒ Object

#munge_token(token, value) ⇒ Object

#namespace ⇒ Object

#pos ⇒ Object

#scan {|[false,false]| ... } ⇒ Object

#skip ⇒ Object

#slurpstring(terminators, escapes = %w{ \\ $ ' " r n t s }+["\n"], ignore_invalid_escapes = false) ⇒ Object

#string=(string) ⇒ Object

#tokenize_interpolated_string(token_type, preamble = '') ⇒ Object

#warn_if_variable_has_hyphen(var_name) ⇒ Object