Module: Puppet::Pops::Parser::HeredocSupport

Includes:
LexerSupport
Included in:
Lexer2
Defined in:
lib/puppet/pops/parser/heredoc_support.rb

Constant Summary collapse

PATTERN_HEREDOC =

Pattern for heredoc `@(endtag[/escapes]) Produces groups for endtag (group 1), syntax (group 2), and escapes (group 3)

%r{@\(([^:/\r\n\)]+)(?::[[:blank:]]*([a-z][a-zA-Z0-9_+]+)[[:blank:]]*)?(?:/((?:\w|[$])*)[[:blank:]]*)?\)}

Constants included from LexerSupport

LexerSupport::BOM_BOCU, LexerSupport::BOM_GB_18030, LexerSupport::BOM_SCSU, LexerSupport::BOM_UTF_1, LexerSupport::BOM_UTF_16_1, LexerSupport::BOM_UTF_16_2, LexerSupport::BOM_UTF_32_1, LexerSupport::BOM_UTF_32_2, LexerSupport::BOM_UTF_8, LexerSupport::BOM_UTF_EBCDIC, LexerSupport::LONGEST_BOM, LexerSupport::MM, LexerSupport::MM_ANY

Instance Method Summary collapse

Methods included from LexerSupport

#assert_not_bom, #assert_numeric, #create_lex_error, #filename, #followed_by, #format_quote, #get_bom, #lex_error, #lex_error_without_pos, #lex_warning, #line, #position

Instance Method Details

#heredocObject


12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# File 'lib/puppet/pops/parser/heredoc_support.rb', line 12

def heredoc
  scn = @scanner
  ctx = @lexing_context
  locator = @locator
  before = scn.pos

  # scanner is at position before @(
  # find end of the heredoc spec
  str = scn.scan_until(/\)/) || lex_error(Issues::HEREDOC_UNCLOSED_PARENTHESIS, :followed_by => followed_by)
  pos_after_heredoc = scn.pos
  # Note: allows '+' as separator in syntax, but this needs validation as empty segments are not allowed
  md = str.match(PATTERN_HEREDOC)
  lex_error(Issues::HEREDOC_INVALID_SYNTAX) unless md
  endtag = md[1]
  syntax = md[2] || ''
  escapes = md[3]

  endtag.strip!

  # Is this a dq string style heredoc? (endtag enclosed in "")
  if endtag =~ /^"(.*)"$/
    dqstring_style = true
    endtag = $1.strip
  end

  lex_error(Issues::HEREDOC_EMPTY_ENDTAG) unless endtag.length >= 1

  resulting_escapes = []
  if escapes
    escapes = "trnsuL$" if escapes.length < 1

    escapes = escapes.split('')
    unless escapes.length == escapes.uniq.length
      lex_error(Issues::HEREDOC_MULTIPLE_AT_ESCAPES, :escapes => escapes)
    end
    resulting_escapes = ["\\"]
    escapes.each do |e|
      case e
      when "t", "r", "n", "s", "u", "$"
        resulting_escapes << e
      when "L"
        resulting_escapes += ["\n", "\r\n"]
      else
        lex_error(Issues::HEREDOC_INVALID_ESCAPE, :actual => e)
      end
    end
  end

  # Produce a heredoc token to make the syntax available to the grammar
  enqueue_completed([:HEREDOC, syntax, pos_after_heredoc - before], before)

  # If this is the second or subsequent heredoc on the line, the lexing context's :newline_jump contains
  # the position after the \n where the next heredoc text should scan. If not set, this is the first
  # and it should start scanning after the first found \n (or if not found == error).

  if ctx[:newline_jump]
    scn.pos = ctx[:newline_jump]
  else
    scn.scan_until(/\n/) || lex_error(Issues::HEREDOC_WITHOUT_TEXT)
  end
  # offset 0 for the heredoc, and its line number
  heredoc_offset = scn.pos
  heredoc_line = locator.line_for_offset(heredoc_offset)-1

  # Compute message to emit if there is no end (to make it refer to the opening heredoc position).
  eof_error = create_lex_error(Issues::HEREDOC_WITHOUT_END_TAGGED_LINE)

  # Text from this position (+ lexing contexts offset for any preceding heredoc) is heredoc until a line
  # that terminates the heredoc is found.

  # (Endline in EBNF form): WS* ('|' WS*)? ('-' WS*)? endtag WS* \r? (\n|$)
  endline_pattern = /([[:blank:]]*)(?:([|])[[:blank:]]*)?(?:(\-)[[:blank:]]*)?#{Regexp.escape(endtag)}[[:blank:]]*\r?(?:\n|\z)/
  lines = []
  while !scn.eos? do
    one_line = scn.scan_until(/(?:\n|\z)/)
    raise eof_error unless one_line
    md = one_line.match(endline_pattern)
    if md
      leading      = md[1]
      has_margin   = md[2] == '|'
      remove_break = md[3] == '-'
      # Record position where next heredoc (from same line as current @()) should start scanning for content
      ctx[:newline_jump] = scn.pos


      # Process captured lines - remove leading, and trailing newline
      # get processed string and index of removed margin/leading size per line
      str, margin_per_line = heredoc_text(lines, leading, has_margin, remove_break)

      # Use a new lexer instance configured with a sub-locator to enable correct positioning
      sublexer = self.class.new()
      locator = Locator::SubLocator.new(locator, str, heredoc_line, heredoc_offset, has_margin, margin_per_line)

      # Emit a token that provides the grammar with location information about the lines on which the heredoc
      # content is based.
      enqueue([:SUBLOCATE,
        LexerSupport::TokenValue.new([:SUBLOCATE,
          lines, lines.reduce(0) {|size, s| size + s.length} ],
          heredoc_offset,
          locator)])

      sublexer.lex_unquoted_string(str, locator, resulting_escapes, dqstring_style)
      sublexer.interpolate_uq_to(self)
      # Continue scan after @(...)
      scn.pos = pos_after_heredoc
      return
    else
      lines << one_line
    end
  end
  raise eof_error
end

#heredoc_text(lines, leading, has_margin, remove_break) ⇒ Array

Produces the heredoc text string given the individual (unprocessed) lines as an array and array with margin sizes per line


132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
# File 'lib/puppet/pops/parser/heredoc_support.rb', line 132

def heredoc_text(lines, leading, has_margin, remove_break)
  if has_margin && leading.length > 0
    leading_pattern = /^#{Regexp.escape(leading)}/
    # TODO: This implementation is not according to the specification, but is kept to be bug compatible.
    # The specification says that leading space up to the margin marker should be removed, but this implementation
    # simply leaves lines that have text in the margin untouched.
    #
    processed_lines = lines.collect {|s| s.gsub(leading_pattern, '') }
    margin_per_line = Array.new(processed_lines.length) {|x| lines[x].length - processed_lines[x].length }
    lines = processed_lines
  else
    # Array with a 0 per line
    margin_per_line = Array.new(lines.length, 0)
  end
  result = lines.join('')
  result.gsub!(/\r?\n\z/m, '') if remove_break
  [result, margin_per_line]
end