Module: EBNF::PEG::Rule
- Defined in:
- lib/ebnf/peg/rule.rb
Overview
Behaviior for parsing a PEG rule
Instance Attribute Summary collapse
-
#parser ⇒ EBNF::PEG::Parser
Initialized by parser when loading rules.
Instance Method Summary collapse
-
#eat_whitespace(input) ⇒ Object
Eat whitespace between non-terminal rules.
-
#parse(input) ⇒ Hash{Symbol => Object}, :unmatched
If there is are ‘start_production` and/or `production`, they are invoked with a `prod_data` stack, the input stream and offset.
Instance Attribute Details
#parser ⇒ EBNF::PEG::Parser
Initialized by parser when loading rules. Used for finding rules and invoking elements of the parse process.
9 10 11 |
# File 'lib/ebnf/peg/rule.rb', line 9 def parser @parser end |
Instance Method Details
#eat_whitespace(input) ⇒ Object
Eat whitespace between non-terminal rules
213 214 215 216 217 218 219 220 |
# File 'lib/ebnf/peg/rule.rb', line 213 def eat_whitespace(input) if parser.whitespace.is_a?(Regexp) # Eat whitespace before a non-terminal input.skip(parser.whitespace) elsif parser.whitespace.is_a?(Rule) parser.whitespace.parse(input) # throw away result end end |
#parse(input) ⇒ Hash{Symbol => Object}, :unmatched
If there is are ‘start_production` and/or `production`, they are invoked with a `prod_data` stack, the input stream and offset. Otherwise, the results are added as an array value to a hash indexed by the rule name.
If matched, the input position is updated and the results returned in a Hash.
-
‘alt`: returns the value of the matched production or `:unmatched`
-
‘diff`: returns the string value matched, or `:unmatched`
-
‘hex`: returns a string composed of the matched hex character, or `:unmatched`.
-
‘opt`: returns the matched production, or `nil` if unmatched.
-
‘plus`: returns an array of the matches for the specified production, or `:unmatched`, if none are matched. For Terminals, these are concatenated into a single string.
-
‘range`: returns a string composed of the character matching the range, or `:unmatched`.
-
‘seq`: returns an array composed of single-entry hashes for each matched production indexed by the production name, or `:unmatched` if any production fails to match. For Terminals, returns a string created by concatenating these values.
-
‘star`: returns an array of the matches for the specified production.For Terminals, these are concatenated into a single string.
31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 |
# File 'lib/ebnf/peg/rule.rb', line 31 def parse(input) # Save position and linenumber for backtracking pos, lineno = input.pos, input.lineno parser.packrat[sym] ||= {} if parser.packrat[sym][pos] parser.debug("#{sym}(:memo)", lineno: lineno) { "#{parser.packrat[sym][pos].inspect}(@#{pos})"} input.pos, input.lineno = parser.packrat[sym][pos][:pos], parser.packrat[sym][pos][:lineno] return parser.packrat[sym][pos][:result] end if terminal? # If the terminal is defined with a regular expression, # use that to match the input, # otherwise, if regexp = parser.find_terminal_regexp(sym) matched = input.scan(regexp) result = (matched ? parser.onTerminal(sym, matched) : :unmatched) # Update furthest failure for strings and terminals parser.update_furthest_failure(input.pos, input.lineno, sym) if result == :unmatched parser.packrat[sym][pos] = { pos: input.pos, lineno: input.lineno, result: result } return parser.packrat[sym][pos][:result] end else eat_whitespace(input) end parser.onStart(sym) result = case expr.first when :alt # Return the first expression to match. # Result is either :unmatched, or the value of the matching rule alt = :unmatched expr[1..-1].each do |prod| alt = case prod when Symbol rule = parser.find_rule(prod) raise "No rule found for #{prod}" unless rule rule.parse(input) when String input.scan(Regexp.new(Regexp.quote(prod))) || :unmatched end if alt == :unmatched # Update furthest failure for strings and terminals parser.update_furthest_failure(input.pos, input.lineno, prod) if prod.is_a?(String) || rule.terminal? else break end end alt when :diff # matches any string that matches A but does not match B. # XXX: Should this work for arbitrary rules? re1, re2 = Regexp.new(translate_codepoints(expr[1])), Regexp.new(translate_codepoints(expr[2])) matched = input.scan(re1) if !matched || re2.match?(matched) # Update furthest failure for terminals parser.update_furthest_failure(input.pos, input.lineno, sym) :unmatched else matched end when :hex # Matches the given hex character if expression matches the character whose number (code point) in ISO/IEC 10646 is N. The number of leading zeros in the #xN form is insignificant. input.scan(to_regexp) || begin # Update furthest failure for terminals parser.update_furthest_failure(input.pos, input.lineno, expr.last) :unmatched end when :opt # Always matches opt = case prod = expr[1] when Symbol rule = parser.find_rule(prod) raise "No rule found for #{prod}" unless rule rule.parse(input) when String input.scan(Regexp.new(Regexp.quote(prod))) || :unmatched end if opt == :unmatched # Update furthest failure for terminals parser.update_furthest_failure(input.pos, input.lineno, prod) if terminal? nil else opt end when :plus # Result is an array of all expressions while they match, # at least one must match prod, plus = expr[1], [] case prod when Symbol rule = parser.find_rule(prod) raise "No rule found for #{prod}" unless rule while (res = rule.parse(input)) != :unmatched eat_whitespace(input) plus << res end when String while res = input.scan(Regexp.new(Regexp.quote(prod))) eat_whitespace(input) plus << res end end # Update furthest failure for strings and terminals parser.update_furthest_failure(input.pos, input.lineno, prod) plus.empty? ? :unmatched : (terminal? ? plus.compact.join("") : plus.compact) when :range # Matches the specified character range input.scan(to_regexp) || begin # Update furthest failure for strings and terminals parser.update_furthest_failure(input.pos, input.lineno, expr[1]) :unmatched end when :seq # Evaluate each expression into an array of hashes where each hash contains a key from the associated production and the value is the parsed value of that production. Returns :unmatched if the input does not match the production. Value ordering is ensured by native Hash ordering. seq = expr[1..-1].each_with_object([]) do |prod, accumulator| eat_whitespace(input) unless accumulator.empty? res = case prod when Symbol rule = parser.find_rule(prod) raise "No rule found for #{prod}" unless rule rule.parse(input) when String input.scan(Regexp.new(Regexp.quote(prod))) || :unmatched end if res == :unmatched # Update furthest failure for strings and terminals parser.update_furthest_failure(input.pos, input.lineno, prod) break :unmatched end accumulator << {prod.to_sym => res} end seq == :unmatched ? :unmatched : (terminal? ? seq.map(&:values).compact.join("") : # Concat values for terminal production seq) when :star # Result is an array of all expressions while they match, # an empty array of none match prod, star = expr[1], [] case prod when Symbol rule = parser.find_rule(prod) raise "No rule found for #{prod}" unless rule while (res = rule.parse(input)) != :unmatched eat_whitespace(input) star << res end when String while res = input.scan(Regexp.new(Regexp.quote(prod))) eat_whitespace(input) star << res end end # Update furthest failure for strings and terminals parser.update_furthest_failure(input.pos, input.lineno, prod) star.compact else raise "attempt to parse unknown rule type: #{expr.first}" end if result == :unmatched input.pos, input.lineno = pos, lineno end result = parser.onFinish(result) (parser.packrat[sym] ||= {})[pos] = { pos: input.pos, lineno: input.lineno, result: result } return parser.packrat[sym][pos][:result] end |