Module: EBNF::PEG::Rule

Defined in:
lib/ebnf/peg/rule.rb

Overview

Behaviior for parsing a PEG rule

Instance Attribute Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#parserEBNF::PEG::Parser

Initialized by parser when loading rules. Used for finding rules and invoking elements of the parse process.

Returns:



9
10
11
# File 'lib/ebnf/peg/rule.rb', line 9

def parser
  @parser
end

Instance Method Details

#eat_whitespace(input) ⇒ Object

Eat whitespace between non-terminal rules



213
214
215
216
217
218
219
220
# File 'lib/ebnf/peg/rule.rb', line 213

def eat_whitespace(input)
  if parser.whitespace.is_a?(Regexp)
    # Eat whitespace before a non-terminal
    input.skip(parser.whitespace)
  elsif parser.whitespace.is_a?(Rule)
    parser.whitespace.parse(input) # throw away result
  end
end

#parse(input) ⇒ Hash{Symbol => Object}, :unmatched

If there is are ‘start_production` and/or `production`, they are invoked with a `prod_data` stack, the input stream and offset. Otherwise, the results are added as an array value to a hash indexed by the rule name.

If matched, the input position is updated and the results returned in a Hash.

  • ‘alt`: returns the value of the matched production or `:unmatched`

  • ‘diff`: returns the string value matched, or `:unmatched`

  • ‘hex`: returns a string composed of the matched hex character, or `:unmatched`.

  • ‘opt`: returns the matched production, or `nil` if unmatched.

  • ‘plus`: returns an array of the matches for the specified production, or `:unmatched`, if none are matched. For Terminals, these are concatenated into a single string.

  • ‘range`: returns a string composed of the character matching the range, or `:unmatched`.

  • ‘seq`: returns an array composed of single-entry hashes for each matched production indexed by the production name, or `:unmatched` if any production fails to match. For Terminals, returns a string created by concatenating these values.

  • ‘star`: returns an array of the matches for the specified production.For Terminals, these are concatenated into a single string.

Parameters:

  • input (Scanner)

Returns:

  • (Hash{Symbol => Object}, :unmatched)

    A hash with keys for matched component of the expression. Returns :unmatched if the input does not match the production.



31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
# File 'lib/ebnf/peg/rule.rb', line 31

def parse(input)
  # Save position and linenumber for backtracking
  pos, lineno = input.pos, input.lineno

  parser.packrat[sym] ||= {}
  if parser.packrat[sym][pos]
    parser.debug("#{sym}(:memo)", lineno: lineno) { "#{parser.packrat[sym][pos].inspect}(@#{pos})"}
    input.pos, input.lineno = parser.packrat[sym][pos][:pos], parser.packrat[sym][pos][:lineno]
    return parser.packrat[sym][pos][:result]
  end

  if terminal?
    # If the terminal is defined with a regular expression,
    # use that to match the input,
    # otherwise,
    if regexp = parser.find_terminal_regexp(sym)
      matched = input.scan(regexp)
      result = (matched ? parser.onTerminal(sym, matched) : :unmatched)
      # Update furthest failure for strings and terminals
      parser.update_furthest_failure(input.pos, input.lineno, sym) if result == :unmatched
      parser.packrat[sym][pos] = {
        pos: input.pos,
        lineno: input.lineno,
        result: result
      }
      return parser.packrat[sym][pos][:result]
    end
  else
    eat_whitespace(input)
  end
  parser.onStart(sym)

  result = case expr.first
  when :alt
    # Return the first expression to match.
    # Result is either :unmatched, or the value of the matching rule
    alt = :unmatched
    expr[1..-1].each do |prod|
      alt = case prod
      when Symbol
        rule = parser.find_rule(prod)
        raise "No rule found for #{prod}" unless rule
        rule.parse(input)
      when String
        input.scan(Regexp.new(Regexp.quote(prod))) || :unmatched
      end
      if alt == :unmatched
        # Update furthest failure for strings and terminals
        parser.update_furthest_failure(input.pos, input.lineno, prod) if prod.is_a?(String) || rule.terminal?
      else
        break
      end
    end
    alt
  when :diff
    # matches any string that matches A but does not match B.
    # XXX: Should this work for arbitrary rules?
    re1, re2 = Regexp.new(translate_codepoints(expr[1])), Regexp.new(translate_codepoints(expr[2]))
    matched = input.scan(re1)
    if !matched || re2.match?(matched)
      # Update furthest failure for terminals
      parser.update_furthest_failure(input.pos, input.lineno, sym)
      :unmatched
    else
      matched
    end
  when :hex
    # Matches the given hex character if expression matches the character whose number (code point) in ISO/IEC 10646 is N. The number of leading zeros in the #xN form is insignificant.
    input.scan(to_regexp) || begin
      # Update furthest failure for terminals
      parser.update_furthest_failure(input.pos, input.lineno, expr.last)
      :unmatched
    end
  when :opt
    # Always matches
    opt = case prod = expr[1]
    when Symbol
      rule = parser.find_rule(prod)
      raise "No rule found for #{prod}" unless rule
      rule.parse(input)
    when String
      input.scan(Regexp.new(Regexp.quote(prod))) || :unmatched
    end
    if opt == :unmatched
      # Update furthest failure for terminals
      parser.update_furthest_failure(input.pos, input.lineno, prod) if terminal?
      nil
    else
      opt
    end
  when :plus
    # Result is an array of all expressions while they match,
    # at least one must match
    prod, plus = expr[1], []
    case prod
    when Symbol
      rule = parser.find_rule(prod)
      raise "No rule found for #{prod}" unless rule
      while (res = rule.parse(input)) != :unmatched
        eat_whitespace(input)
        plus << res
      end
    when String
      while res = input.scan(Regexp.new(Regexp.quote(prod)))
        eat_whitespace(input)
        plus << res
      end
    end
    # Update furthest failure for strings and terminals
    parser.update_furthest_failure(input.pos, input.lineno, prod)
    plus.empty? ? :unmatched : (terminal? ? plus.compact.join("") : plus.compact)
  when :range
    # Matches the specified character range
    input.scan(to_regexp) || begin
      # Update furthest failure for strings and terminals
      parser.update_furthest_failure(input.pos, input.lineno, expr[1])
      :unmatched
    end
  when :seq
    # Evaluate each expression into an array of hashes where each hash contains a key from the associated production and the value is the parsed value of that production. Returns :unmatched if the input does not match the production. Value ordering is ensured by native Hash ordering.
    seq = expr[1..-1].each_with_object([]) do |prod, accumulator|
      eat_whitespace(input) unless accumulator.empty?
      res = case prod
      when Symbol
        rule = parser.find_rule(prod)
        raise "No rule found for #{prod}" unless rule
        rule.parse(input)
      when String
        input.scan(Regexp.new(Regexp.quote(prod))) || :unmatched
      end
      if res == :unmatched
        # Update furthest failure for strings and terminals
        parser.update_furthest_failure(input.pos, input.lineno, prod)
        break :unmatched 
      end
      accumulator << {prod.to_sym => res}
    end
    seq == :unmatched ?
      :unmatched :
      (terminal? ?
        seq.map(&:values).compact.join("") : # Concat values for terminal production
        seq)
  when :star
    # Result is an array of all expressions while they match,
    # an empty array of none match
    prod, star = expr[1], []
    case prod
    when Symbol
      rule = parser.find_rule(prod)
      raise "No rule found for #{prod}" unless rule
      while (res = rule.parse(input)) != :unmatched
        eat_whitespace(input)
        star << res
      end
    when String
      while res = input.scan(Regexp.new(Regexp.quote(prod)))
        eat_whitespace(input)
        star << res
      end
    end
    # Update furthest failure for strings and terminals
    parser.update_furthest_failure(input.pos, input.lineno, prod)
    star.compact
  else
    raise "attempt to parse unknown rule type: #{expr.first}"
  end

  if result == :unmatched
    input.pos, input.lineno = pos, lineno
  end

  result = parser.onFinish(result)
  (parser.packrat[sym] ||= {})[pos] = {
    pos: input.pos,
    lineno: input.lineno,
    result: result
  }
  return parser.packrat[sym][pos][:result]
end