Class: Puppet::Parser::Lexer

Inherits:

Object

Object
Puppet::Parser::Lexer

show all

Defined in:: lib/puppet/parser/lexer.rb

Defined Under Namespace

Classes: Token, TokenList

Constant Summary collapse

TOKENS =

TokenList.new

DQ_initial_token_types =

{'$' => :DQPRE,'"' => :STRING}

DQ_continuation_token_types =

{'$' => :DQMID,'"' => :DQPOST}

KEYWORDS =

TokenList.new

@@pairs =

{
  "{" => "}",
  "(" => ")",
  "[" => "]",
  "<|" => "|>",
  "<<|" => "|>>"
}

Instance Attribute Summary collapse

#file ⇒ Object

Returns the value of attribute file.
#indefine ⇒ Object

Returns the value of attribute indefine.
#last ⇒ Object readonly

Returns the value of attribute last.
#lexing_context ⇒ Object readonly

Returns the value of attribute lexing_context.
#line ⇒ Object

Returns the value of attribute line.
#token_queue ⇒ Object readonly

Returns the value of attribute token_queue.

Class Method Summary collapse

.acceptable?(context = {}) ⇒ Boolean

:stopdoc: # Issue #4161.

Instance Method Summary collapse

#clear ⇒ Object
#commentpop ⇒ Object

returns the content of the currently accumulated content cache.
#commentpush ⇒ Object
#expected ⇒ Object
#find_regex_token ⇒ Object

Find the next token that matches a regex.
#find_string_token ⇒ Object
#find_token ⇒ Object

Find the next token, returning the string and the token.
#fullscan ⇒ Object

scan the whole file basically just used for testing.
#getcomment(line = nil) ⇒ Object
#indefine? ⇒ Boolean
#initialize ⇒ Lexer constructor

A new instance of Lexer.
#initvars ⇒ Object
#lex_error(msg) ⇒ Object
#munge_token(token, value) ⇒ Object

Make any necessary changes to the token and/or value.
#namepop ⇒ Object

Go up one in the namespace.
#namespace ⇒ Object

Collect the current namespace.
#namestack(value) ⇒ Object
This value might have

in it, but we don’t care – it’ll be handled normally when joining, and when popping we want to pop this full value, however long the namespace is.
#rest ⇒ Object
#scan {|[false,false]| ... } ⇒ Object

this is the heart of the lexer.
#scan_until(regex) ⇒ Object

Provide some limited access to the scanner, for those tokens that need it.
#shift_token ⇒ Object
#skip ⇒ Object

Skip any skipchars in our remaining string.
#slurpstring(terminators, escapes = %w{ \\ $ ' " n t s }+["\n"], ignore_invalid_escapes = false) ⇒ Object

we’ve encountered the start of a string…
#string=(string) ⇒ Object

just parse a string, not a whole file.
#tokenize_interpolated_string(token_type, preamble = '') ⇒ Object

Constructor Details

#initialize ⇒ `Lexer`

Returns a new instance of Lexer.

# File 'lib/puppet/parser/lexer.rb', line 369

def initialize
  @find = 0
  @regex = 0
  initvars
end

Instance Attribute Details

#file ⇒ `Object`

Returns the value of attribute file.



14
15
16

# File 'lib/puppet/parser/lexer.rb', line 14

def file
  @file
end

#indefine ⇒ `Object`

Returns the value of attribute indefine.



16
17
18

# File 'lib/puppet/parser/lexer.rb', line 16

def indefine
  @indefine
end

#last ⇒ `Object` (readonly)

Returns the value of attribute last.



14
15
16

# File 'lib/puppet/parser/lexer.rb', line 14

def last
  @last
end

#lexing_context ⇒ `Object` (readonly)

Returns the value of attribute lexing_context.



14
15
16

# File 'lib/puppet/parser/lexer.rb', line 14

def lexing_context
  @lexing_context
end

#line ⇒ `Object`

Returns the value of attribute line.



16
17
18

# File 'lib/puppet/parser/lexer.rb', line 16

def line
  @line
end

#token_queue ⇒ `Object` (readonly)

Returns the value of attribute token_queue.



14
15
16

# File 'lib/puppet/parser/lexer.rb', line 14

def token_queue
  @token_queue
end

Class Method Details

.acceptable?(context = {}) ⇒ `Boolean`

:stopdoc: # Issue #4161

Returns:

(Boolean)



168
169
170

# File 'lib/puppet/parser/lexer.rb', line 168

def (TOKENS[:NUMBER]).acceptable?(context={})
  ![:DQPRE,:DQMID].include? context[:after]
end

Instance Method Details

#clear ⇒ `Object`



289
290
291

# File 'lib/puppet/parser/lexer.rb', line 289

def clear
  initvars
end

#commentpop ⇒ `Object`

returns the content of the currently accumulated content cache



566
567
568

# File 'lib/puppet/parser/lexer.rb', line 566

def commentpop
  @commentstack.pop[0]
end

#commentpush ⇒ `Object`



580
581
582

# File 'lib/puppet/parser/lexer.rb', line 580

def commentpush
  @commentstack.push(['', @line])
end

#expected ⇒ `Object`

# File 'lib/puppet/parser/lexer.rb', line 293

def expected
  return nil if @expected.empty?
  name = @expected[-1]
  TOKENS.lookup(name) or lex_error "Could not find expected token #{name}"
end

#find_regex_token ⇒ `Object`

Find the next token that matches a regex. We look for these first.

# File 'lib/puppet/parser/lexer.rb', line 335

def find_regex_token
  @regex += 1
  best_token = nil
  best_length = 0

  # I tried optimizing based on the first char, but it had
  # a slightly negative affect and was a good bit more complicated.
  TOKENS.regex_tokens.each do |token|
    if length = @scanner.match?(token.regex) and token.acceptable?(lexing_context)
      # We've found a longer match
      if length > best_length
        best_length = length
        best_token = token
      end
    end
  end

  return best_token, @scanner.scan(best_token.regex) if best_token
end

#find_string_token ⇒ `Object`

# File 'lib/puppet/parser/lexer.rb', line 323

def find_string_token
  # We know our longest string token is three chars, so try each size in turn
  # until we either match or run out of chars.  This way our worst-case is three
  # tries, where it is otherwise the number of string token we have.  Also,
  # the lookups are optimized hash lookups, instead of regex scans.
  #
  s = @scanner.peek(3)
  token = TOKENS.lookup(s[0,3]) || TOKENS.lookup(s[0,2]) || TOKENS.lookup(s[0,1])
  [ token, token && @scanner.scan(token.regex) ]
end

#find_token ⇒ `Object`

Find the next token, returning the string and the token.

# File 'lib/puppet/parser/lexer.rb', line 356

def find_token
  @find += 1
  shift_token || find_regex_token || find_string_token
end

#fullscan ⇒ `Object`

scan the whole file basically just used for testing

# File 'lib/puppet/parser/lexer.rb', line 301

def fullscan
  array = []

  self.scan { |token, str|
    # Ignore any definition nesting problems
    @indefine = false
    array.push([token,str])
  }
  array
end

#getcomment(line = nil) ⇒ `Object`

# File 'lib/puppet/parser/lexer.rb', line 570

def getcomment(line = nil)
  comment = @commentstack.last
  if line.nil? or comment[1] <= line
    @commentstack.pop
    @commentstack.push(['', @line])
    return comment[0]
  end
  ''
end

#indefine? ⇒ `Boolean`

Returns:

(Boolean)

# File 'lib/puppet/parser/lexer.rb', line 361

def indefine?
  if defined?(@indefine)
    @indefine
  else
    false
  end
end

#initvars ⇒ `Object`

# File 'lib/puppet/parser/lexer.rb', line 375

def initvars
  @line = 1
  @previous_token = nil
  @scanner = nil
  @file = nil
  # AAARRGGGG! okay, regexes in ruby are bloody annoying
  # no one else has "\n" =~ /\s/
  @skip = %r{[ \t\r]+}

  @namestack = []
  @token_queue = []
  @indefine = false
  @expected = []
  @commentstack = [ ['', @line] ]
  @lexing_context = {
    :after => nil,
    :start_of_line => true,
    :string_interpolation_depth => 0
    }
end

#lex_error(msg) ⇒ `Object`

Raises:

(Puppet::LexError)



18
19
20

# File 'lib/puppet/parser/lexer.rb', line 18

def lex_error msg
  raise Puppet::LexError.new(msg)
end

#munge_token(token, value) ⇒ `Object`

Make any necessary changes to the token and/or value.

# File 'lib/puppet/parser/lexer.rb', line 397

def munge_token(token, value)
  @line += 1 if token.incr_line

  skip if token.skip_text

  return if token.skip and not token.accumulate?

  token, value = token.convert(self, value) if token.respond_to?(:convert)

  return unless token

  if token.accumulate?
    comment = @commentstack.pop
    comment[0] << value + "\n"
    @commentstack.push(comment)
  end

  return if token.skip

  return token, { :value => value, :line => @line }
end

#namepop ⇒ `Object`

Go up one in the namespace.



420
421
422

# File 'lib/puppet/parser/lexer.rb', line 420

def namepop
  @namestack.pop
end

#namespace ⇒ `Object`

Collect the current namespace.



425
426
427

# File 'lib/puppet/parser/lexer.rb', line 425

def namespace
  @namestack.join("::")
end

#namestack(value) ⇒ `Object`

This value might have: in it, but we don’t care – it’ll be

handled normally when joining, and when popping we want to pop this full value, however long the namespace is.



432
433
434

# File 'lib/puppet/parser/lexer.rb', line 432

def namestack(value)
  @namestack << value
end

#rest ⇒ `Object`



436
437
438

# File 'lib/puppet/parser/lexer.rb', line 436

def rest
  @scanner.rest
end

#scan {|[false,false]| ... } ⇒ `Object`

this is the heart of the lexer

Yields:

([false,false])

# File 'lib/puppet/parser/lexer.rb', line 441

def scan
  #Puppet.debug("entering scan")
  lex_error "Invalid or empty string" unless @scanner

  # Skip any initial whitespace.
  skip

  until token_queue.empty? and @scanner.eos? do
    yielded = false
    matched_token, value = find_token

    # error out if we didn't match anything at all
    lex_error "Could not match #{@scanner.rest[/^(\S+|\s+|.*)/]}" unless matched_token

    newline = matched_token.name == :RETURN

    # this matches a blank line; eat the previously accumulated comments
    getcomment if lexing_context[:start_of_line] and newline
    lexing_context[:start_of_line] = newline

    final_token, token_value = munge_token(matched_token, value)

    unless final_token
      skip
      next
    end

    lexing_context[:after]         = final_token.name unless newline
    lexing_context[:string_interpolation_depth] += 1 if final_token.name == :DQPRE
    lexing_context[:string_interpolation_depth] -= 1 if final_token.name == :DQPOST

    value = token_value[:value]

    if match = @@pairs[value] and final_token.name != :DQUOTE and final_token.name != :SQUOTE
      @expected << match
    elsif exp = @expected[-1] and exp == value and final_token.name != :DQUOTE and final_token.name != :SQUOTE
      @expected.pop
    end

    if final_token.name == :LBRACE or final_token.name == :LPAREN
      commentpush
    end
    if final_token.name == :RPAREN
      commentpop
    end

    yield [final_token.name, token_value]

    if @previous_token
      namestack(value) if @previous_token.name == :CLASS and value != '{'

      if @previous_token.name == :DEFINE
        if indefine?
          msg = "Cannot nest definition #{value} inside #{@indefine}"
          self.indefine = false
          raise Puppet::ParseError, msg
        end

        @indefine = value
      end
    end
    @previous_token = final_token
    skip
  end
  @scanner = nil

  # This indicates that we're done parsing.
  yield [false,false]
end

#scan_until(regex) ⇒ `Object`

Provide some limited access to the scanner, for those tokens that need it.



518
519
520

# File 'lib/puppet/parser/lexer.rb', line 518

def scan_until(regex)
  @scanner.scan_until(regex)
end

#shift_token ⇒ `Object`



319
320
321

# File 'lib/puppet/parser/lexer.rb', line 319

def shift_token
  @token_queue.shift
end

#skip ⇒ `Object`

Skip any skipchars in our remaining string.



512
513
514

# File 'lib/puppet/parser/lexer.rb', line 512

def skip
  @scanner.skip(@skip)
end

#slurpstring(terminators, escapes = %w{ \\ $ ' " n t s }+["\n"], ignore_invalid_escapes = false) ⇒ `Object`

we’ve encountered the start of a string… slurp in the rest of the string and return it

# File 'lib/puppet/parser/lexer.rb', line 524

def slurpstring(terminators,escapes=%w{ \\  $ ' " n t s }+["\n"],ignore_invalid_escapes=false)
  # we search for the next quote that isn't preceded by a
  # backslash; the caret is there to match empty strings
  str = @scanner.scan_until(/([^\\]|^|[^\\])([\\]{2})*[#{terminators}]/) or lex_error "Unclosed quote after '#{last}' in '#{rest}'"
  @line += str.count("\n") # literal carriage returns add to the line count.
  str.gsub!(/\\(.)/m) {
    ch = $1
    if escapes.include? ch
      case ch
      when 'n'; "\n"
      when 't'; "\t"
      when 's'; " "
      when "\n"; ''
      else      ch
      end
    else
      Puppet.warning "Unrecognised escape sequence '\\#{ch}'#{file && " in file #{file}"}#{line && " at line #{line}"}" unless ignore_invalid_escapes
      "\\#{ch}"
    end
  }
  [ str[0..-2],str[-1,1] ]
end

#string=(string) ⇒ `Object`

just parse a string, not a whole file



561
562
563

# File 'lib/puppet/parser/lexer.rb', line 561

def string=(string)
  @scanner = StringScanner.new(string)
end

#tokenize_interpolated_string(token_type, preamble = '') ⇒ `Object`

# File 'lib/puppet/parser/lexer.rb', line 547

def tokenize_interpolated_string(token_type,preamble='')
  value,terminator = slurpstring('"$')
  token_queue << [TOKENS[token_type[terminator]],preamble+value]
  if terminator != '$' or @scanner.scan(/\{/)
    token_queue.shift
  elsif var_name = @scanner.scan(TOKENS[:VARIABLE].regex)
    token_queue << [TOKENS[:VARIABLE],var_name]
    tokenize_interpolated_string(DQ_continuation_token_types)
  else
    tokenize_interpolated_string(token_type,token_queue.pop.last + terminator)
  end
end

Class: Puppet::Parser::Lexer

Defined Under Namespace

Constant Summary collapse

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize ⇒ Lexer

Instance Attribute Details

#file ⇒ Object

#indefine ⇒ Object

#last ⇒ Object (readonly)

#lexing_context ⇒ Object (readonly)

#line ⇒ Object

#token_queue ⇒ Object (readonly)

Class Method Details

.acceptable?(context = {}) ⇒ Boolean

Instance Method Details

#clear ⇒ Object

#commentpop ⇒ Object

#commentpush ⇒ Object

#expected ⇒ Object

#find_regex_token ⇒ Object

#find_string_token ⇒ Object

#find_token ⇒ Object

#fullscan ⇒ Object

#getcomment(line = nil) ⇒ Object

#indefine? ⇒ Boolean

#initvars ⇒ Object

#lex_error(msg) ⇒ Object

#munge_token(token, value) ⇒ Object

#namepop ⇒ Object

#namespace ⇒ Object

#namestack(value) ⇒ Object

#rest ⇒ Object

#scan {|[false,false]| ... } ⇒ Object

#scan_until(regex) ⇒ Object

#shift_token ⇒ Object

#skip ⇒ Object

#slurpstring(terminators, escapes = %w{ \\ $ ' " n t s }+["\n"], ignore_invalid_escapes = false) ⇒ Object

#string=(string) ⇒ Object

#tokenize_interpolated_string(token_type, preamble = '') ⇒ Object