Module: Puppet::Pops::Parser::SlurpSupport

Includes:
LexerSupport
Included in:
Lexer2
Defined in:
lib/puppet/pops/parser/slurp_support.rb

Overview

TODO: More detailed performance analysis of excessive character escaping and interpolation.

Constant Summary collapse

SLURP_SQ_PATTERN =
/(?:[^\\]|^)(?:\\{2})*'/
SLURP_DQ_PATTERN =
/(?:[^\\]|^)(?:\\{2})*("|[$]\{?)/
SLURP_UQ_PATTERN =
/(?:[^\\]|^)(?:\\{2})*([$]\{?|\z)/
SLURP_UQNE_PATTERN =

unquoted, no escapes

/(\$\{?|\z)/m
SLURP_ALL_PATTERN =
/.*(\z)/m
SQ_ESCAPES =
%w[\\ ']
DQ_ESCAPES =
%w[\\  $ ' " r n t s u] + ["\r\n", "\n"]
UQ_ESCAPES =
%w[\\  $ r n t s u] + ["\r\n", "\n"]

Constants included from LexerSupport

LexerSupport::BOM_BOCU, LexerSupport::BOM_GB_18030, LexerSupport::BOM_SCSU, LexerSupport::BOM_UTF_1, LexerSupport::BOM_UTF_16_1, LexerSupport::BOM_UTF_16_2, LexerSupport::BOM_UTF_32_1, LexerSupport::BOM_UTF_32_2, LexerSupport::BOM_UTF_8, LexerSupport::BOM_UTF_EBCDIC, LexerSupport::LONGEST_BOM, LexerSupport::MM, LexerSupport::MM_ANY

Instance Method Summary collapse

Methods included from LexerSupport

#assert_not_bom, #assert_numeric, #create_lex_error, #filename, #followed_by, #format_quote, #get_bom, #lex_error, #lex_error_without_pos, #lex_warning, #line, #position

Instance Method Details

#slurp(scanner, pattern, escapes, ignore_invalid_escapes) ⇒ Object

Slurps a string from the given scanner until the given pattern and then replaces any escaped characters given by escapes into their control-character equivalent or in case of line breaks, replaces the pattern r?n with an empty string. The returned string contains the terminating character. Returns nil if the scanner can not scan until the given pattern.



68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# File 'lib/puppet/pops/parser/slurp_support.rb', line 68

def slurp(scanner, pattern, escapes, ignore_invalid_escapes)
  str = scanner.scan_until(pattern) || return

  return str unless str.include?('\\')

  return str.gsub!(/\\(\\|')/m, '\1') || str if escapes.equal?(SQ_ESCAPES)

  # Process unicode escapes first as they require getting 4 hex digits
  # If later a \u is found it is warned not to be a unicode escape
  if escapes.include?('u')
    # gsub must be repeated to cater for adjacent escapes
    while str.gsub!(/((?:[^\\]|^)(?:\\{2})*)\\u(?:([\da-fA-F]{4})|\{([\da-fA-F]{1,6})\})/m) { ::Regexp.last_match(1) + [(::Regexp.last_match(2) || ::Regexp.last_match(3)).hex].pack("U") }
      # empty block. Everything happens in the gsub block
    end
  end

  begin
    str.gsub!(/\\([^\r\n]|(?:\r?\n))/m) {
      ch = ::Regexp.last_match(1)
      if escapes.include? ch
        case ch
        when 'r'; "\r"
        when 'n'; "\n"
        when 't'; "\t"
        when 's'; ' '
        when 'u'
          lex_warning(Issues::ILLEGAL_UNICODE_ESCAPE)
          "\\u"
        when "\n"; ''
        when "\r\n"; ''
        else ch
        end
      else
        lex_warning(Issues::UNRECOGNIZED_ESCAPE, :ch => ch) unless ignore_invalid_escapes
        "\\#{ch}"
      end
    }
  rescue ArgumentError => e
    # A invalid byte sequence may be the result of faulty input as well, but that could not possibly
    # have reached this far... Unfortunately there is no more specific error and a match on message is
    # required to differentiate from other internal problems.
    if e.message =~ /invalid byte sequence/
      lex_error(Issues::ILLEGAL_UNICODE_ESCAPE)
    else
      raise e
    end
  end
  str
end

#slurp_dqstringObject



34
35
36
37
38
39
40
41
42
43
44
45
# File 'lib/puppet/pops/parser/slurp_support.rb', line 34

def slurp_dqstring
  scn = @scanner
  last = scn.matched
  str = slurp(scn, SLURP_DQ_PATTERN, DQ_ESCAPES, false)
  unless str
    lex_error(Issues::UNCLOSED_QUOTE, :after => format_quote(last), :followed_by => followed_by)
  end

  # Terminator may be a single char '"', '$', or two characters '${' group match 1 (scn[1]) from the last slurp holds this
  terminator = scn[1]
  [str[0..(-1 - terminator.length)], terminator]
end

#slurp_sqstringObject



26
27
28
29
30
31
32
# File 'lib/puppet/pops/parser/slurp_support.rb', line 26

def slurp_sqstring
  # skip the leading '
  @scanner.pos += 1
  str = slurp(@scanner, SLURP_SQ_PATTERN, SQ_ESCAPES, :ignore_invalid_escapes)
  lex_error(Issues::UNCLOSED_QUOTE, :after => "\"'\"", :followed_by => followed_by) unless str
  str[0..-2] # strip closing "'" from result
end

#slurp_uqstringObject

Copy from old lexer - can do much better



48
49
50
51
52
53
54
55
56
57
58
59
60
# File 'lib/puppet/pops/parser/slurp_support.rb', line 48

def slurp_uqstring
  scn = @scanner
  str = slurp(scn, @lexing_context[:uq_slurp_pattern], @lexing_context[:escapes], :ignore_invalid_escapes)

  # Terminator may be a single char '$', two characters '${', or empty string '' at the end of intput.
  # Group match 1 holds this.
  # The exceptional case is found by looking at the subgroup 1 of the most recent match made by the scanner (i.e. @scanner[1]).
  # This is the last match made by the slurp method (having called scan_until on the scanner).
  # If there is a terminating character is must be stripped and returned separately.
  #
  terminator = scn[1]
  [str[0..(-1 - terminator.length)], terminator]
end