Module: Puppet::Pops::Parser::HeredocSupport

Includes:: LexerSupport

Included in:: Lexer2

Defined in:: lib/puppet/pops/parser/heredoc_support.rb

Constant Summary collapse

PATTERN_HEREDOC = Pattern for heredoc ‘@(endtag[/escapes]) Produces groups for endtag (group 1), syntax (group 2), and escapes (group 3)

%r{@\(([^:/\r\n\)]+)(?::[[:blank:]]*([a-z][a-zA-Z0-9_+]+)[[:blank:]]*)?(?:/((?:\w|[$])*)[[:blank:]]*)?\)}

Constants included from LexerSupport

LexerSupport::BOM_BOCU, LexerSupport::BOM_GB_18030, LexerSupport::BOM_SCSU, LexerSupport::BOM_UTF_1, LexerSupport::BOM_UTF_16_1, LexerSupport::BOM_UTF_16_2, LexerSupport::BOM_UTF_32_1, LexerSupport::BOM_UTF_32_2, LexerSupport::BOM_UTF_8, LexerSupport::BOM_UTF_EBCDIC, LexerSupport::LONGEST_BOM, LexerSupport::MM, LexerSupport::MM_ANY

Instance Method Summary collapse

#heredoc ⇒ Object
#heredoc_text(lines, leading, has_margin, remove_break) ⇒ Array

Produces the heredoc text string given the individual (unprocessed) lines as an array and array with margin sizes per line.

Methods included from LexerSupport

#assert_not_bom, #assert_numeric, #create_lex_error, #filename, #followed_by, #format_quote, #get_bom, #lex_error, #lex_error_without_pos, #lex_warning, #line, #position

Instance Method Details

#heredoc ⇒ `Object`

# File 'lib/puppet/pops/parser/heredoc_support.rb', line 12

def heredoc
  scn = @scanner
  ctx = @lexing_context
  locator = @locator
  before = scn.pos

  # scanner is at position before @(
  # find end of the heredoc spec
  str = scn.scan_until(/\)/) || lex_error(Issues::HEREDOC_UNCLOSED_PARENTHESIS, :followed_by => followed_by)
  pos_after_heredoc = scn.pos
  # Note: allows '+' as separator in syntax, but this needs validation as empty segments are not allowed
  md = str.match(PATTERN_HEREDOC)
  lex_error(Issues::HEREDOC_INVALID_SYNTAX) unless md
  endtag = md[1]
  syntax = md[2] || ''
  escapes = md[3]

  endtag.strip!

  # Is this a dq string style heredoc? (endtag enclosed in "")
  if endtag =~ /^"(.*)"$/
    dqstring_style = true
    endtag = $1.strip
  end

  lex_error(Issues::HEREDOC_EMPTY_ENDTAG) unless endtag.length >= 1

  resulting_escapes = []
  if escapes
    escapes = "trnsuL$" if escapes.length < 1

    escapes = escapes.split('')
    unless escapes.length == escapes.uniq.length
      lex_error(Issues::HEREDOC_MULTIPLE_AT_ESCAPES, :escapes => escapes)
    end
    resulting_escapes = ["\\"]
    escapes.each do |e|
      case e
      when "t", "r", "n", "s", "u", "$"
        resulting_escapes << e
      when "L"
        resulting_escapes += ["\n", "\r\n"]
      else
        lex_error(Issues::HEREDOC_INVALID_ESCAPE, :actual => e)
      end
    end
  end

  # Produce a heredoc token to make the syntax available to the grammar
  enqueue_completed([:HEREDOC, syntax, pos_after_heredoc - before], before)

  # If this is the second or subsequent heredoc on the line, the lexing context's :newline_jump contains
  # the position after the \n where the next heredoc text should scan. If not set, this is the first
  # and it should start scanning after the first found \n (or if not found == error).

  if ctx[:newline_jump]
    scn.pos = ctx[:newline_jump]
  else
    scn.scan_until(/\n/) || lex_error(Issues::HEREDOC_WITHOUT_TEXT)
  end
  # offset 0 for the heredoc, and its line number
  heredoc_offset = scn.pos
  heredoc_line = locator.line_for_offset(heredoc_offset)-1

  # Compute message to emit if there is no end (to make it refer to the opening heredoc position).
  eof_error = create_lex_error(Issues::HEREDOC_WITHOUT_END_TAGGED_LINE)

  # Text from this position (+ lexing contexts offset for any preceding heredoc) is heredoc until a line
  # that terminates the heredoc is found.

  # (Endline in EBNF form): WS* ('|' WS*)? ('-' WS*)? endtag WS* \r? (\n|$)
  endline_pattern = /([[:blank:]]*)(?:([|])[[:blank:]]*)?(?:(\-)[[:blank:]]*)?#{Regexp.escape(endtag)}[[:blank:]]*\r?(?:\n|\z)/
  lines = []
  while !scn.eos? do
    one_line = scn.scan_until(/(?:\n|\z)/)
    raise eof_error unless one_line
    if md = one_line.match(endline_pattern)
      leading      = md[1]
      has_margin   = md[2] == '|'
      remove_break = md[3] == '-'
      # Record position where next heredoc (from same line as current @()) should start scanning for content
      ctx[:newline_jump] = scn.pos


      # Process captured lines - remove leading, and trailing newline
      # get processed string and index of removed margin/leading size per line
      str, margin_per_line = heredoc_text(lines, leading, has_margin, remove_break)

      # Use a new lexer instance configured with a sub-locator to enable correct positioning
      sublexer = self.class.new()
      locator = Locator::SubLocator.new(locator, str, heredoc_line, heredoc_offset, has_margin, margin_per_line)

      # Emit a token that provides the grammar with location information about the lines on which the heredoc
      # content is based.
      enqueue([:SUBLOCATE,
        LexerSupport::TokenValue.new([:SUBLOCATE,
          lines, lines.reduce(0) {|size, s| size + s.length} ],
          heredoc_offset,
          locator)])

      sublexer.lex_unquoted_string(str, locator, resulting_escapes, dqstring_style)
      sublexer.interpolate_uq_to(self)
      # Continue scan after @(...)
      scn.pos = pos_after_heredoc
      return
    else
      lines << one_line
    end
  end
  raise eof_error
end

#heredoc_text(lines, leading, has_margin, remove_break) ⇒ `Array`

Produces the heredoc text string given the individual (unprocessed) lines as an array and array with margin sizes per line

Parameters:

lines (Array<String>) —

unprocessed lines of text in the heredoc w/o terminating line
leading (String) —

the leading text up (up to pipe or other terminating char)
has_margin (Boolean) —

if the left margin should be adjusted as indicated by ‘leading`
remove_break (Boolean) —

if the line break (r?n) at the end of the last line should be removed or not

Returns:

(Array) —
- a tuple with resulting string, and an array with margin size per line

# File 'lib/puppet/pops/parser/heredoc_support.rb', line 131

def heredoc_text(lines, leading, has_margin, remove_break)
  if has_margin && leading.length > 0
    leading_pattern = /^#{Regexp.escape(leading)}/
    # TODO: This implementation is not according to the specification, but is kept to be bug compatible.
    # The specification says that leading space up to the margin marker should be removed, but this implementation
    # simply leaves lines that have text in the margin untouched.
    #
    processed_lines = lines.collect {|s| s.gsub(leading_pattern, '') }
    margin_per_line = processed_lines.length.times.map {|x| lines[x].length - processed_lines[x].length }
    lines = processed_lines
  else
    # Array with a 0 per line
    margin_per_line = Array.new(lines.length, 0)
  end
  result = lines.join('')
  result.gsub!(/\r?\n\z/m, '') if remove_break
  [result, margin_per_line]
end