Module: EBNF::LL1
- Included in:
- Base
- Defined in:
- lib/ebnf/ll1.rb,
lib/ebnf/ll1/lexer.rb,
lib/ebnf/ll1/parser.rb,
lib/ebnf/ll1/scanner.rb
Defined Under Namespace
Modules: Parser Classes: Lexer, Scanner
Instance Attribute Summary collapse
-
#branch ⇒ Hash{Symbol => Hash{String, Symbol => Array<Symbol>}}
readonly
Branch table, represented as a recursive hash.
-
#cleanup ⇒ Hash{Symbol => Symbol}
readonly
EBNF Cleanup table.
-
#first ⇒ Hash{Symbol => Array<String, Symbol>}
readonly
First table.
-
#follow ⇒ Hash{Symbol => Array<String, Symbol>}
readonly
Follow table.
-
#pass ⇒ Symbol, String
readonly
Pass expression.
-
#start ⇒ Symbol
readonly
Start symbol.
-
#terminals ⇒ Array<String, Symbol>
readonly
Terminal table.
Instance Method Summary collapse
-
#build_tables ⇒ Object
Generate parser tables, #branch, #first, #follow, and #terminals.
-
#first_follow(*starts) ⇒ EBNF
Create first/follow for each rule using techniques defined for LL(1) parsers.
-
#outputTable(io, name, table, indent = 0) ⇒ Object
Generate an output table in Ruby format.
Instance Attribute Details
#branch ⇒ Hash{Symbol => Hash{String, Symbol => Array<Symbol>}} (readonly)
Branch table, represented as a recursive hash. The table is indexed by rule symbol, which in-turn references a hash of terminals (which are the first terminals of the production), which in turn reference the sequence of rules that follow, given that terminal as input
11 12 13 |
# File 'lib/ebnf/ll1.rb', line 11 def branch @branch end |
#cleanup ⇒ Hash{Symbol => Symbol} (readonly)
EBNF Cleanup table
The list of terminals used in the grammar.
28 29 30 |
# File 'lib/ebnf/ll1.rb', line 28 def cleanup @cleanup end |
#first ⇒ Hash{Symbol => Array<String, Symbol>} (readonly)
First table
16 17 18 |
# File 'lib/ebnf/ll1.rb', line 16 def first @first end |
#follow ⇒ Hash{Symbol => Array<String, Symbol>} (readonly)
Follow table
21 22 23 |
# File 'lib/ebnf/ll1.rb', line 21 def follow @follow end |
#pass ⇒ Symbol, String (readonly)
Pass expression
A Terminal symbol used for skipping whitespace and comments
42 43 44 |
# File 'lib/ebnf/ll1.rb', line 42 def pass @pass end |
#start ⇒ Symbol (readonly)
Start symbol
The rule which starts the grammar
49 50 51 |
# File 'lib/ebnf/ll1.rb', line 49 def start @start end |
#terminals ⇒ Array<String, Symbol> (readonly)
Terminal table
The list of terminals used in the grammar.
35 36 37 |
# File 'lib/ebnf/ll1.rb', line 35 def terminals @terminals end |
Instance Method Details
#build_tables ⇒ Object
Generate parser tables, #branch, #first, #follow, and #terminals
176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 |
# File 'lib/ebnf/ll1.rb', line 176 def build_tables progress("build_tables") { "Terminals: #{ast.count {|r| r.terminal?}} " + "Non-Terminals: #{ast.count {|r| r.rule?}}" } @first = ast. select(&:first). inject({}) {|memo, r| memo.merge(r.sym => r.first) } @follow = ast. select(&:follow). inject({}) {|memo, r| memo.merge(r.sym => r.follow) } @cleanup = ast. select(&:cleanup). inject({}) {|memo, r| memo.merge(r.sym => r.cleanup)} @terminals = ast.map {|r| Array(r.first) + Array(r.follow)}.flatten.uniq @terminals = (@terminals - [:_eps, :_eof]).sort_by{|t| t.to_s.sub(/^_/, '')} # FIXME: assumes that this is a (seq :PASS), or similar if pass = ast.detect {|r| r.pass?} @pass = pass.expr.last end # If a generated terminal is found, this indicates an error, as this version does not automatically generate regular expressions for automatic terminals @terminals. select {|t| t.to_s.start_with?("_")}. reject {|t| t.to_s.start_with?("_pass_")}. # Concession to backwards compatibility each do |term| error("build_tables", "terminal #{term} is automatically generated; " + "regular expressions are not yet generated and parsing " + "is not supported") end @branch = {} @already = [] @agenda = [] Array(@starts).each do |start| do_production(start) while !@agenda.empty? x = @agenda.shift do_production(x) end end if !@errors.empty? progress("###### FAILED with #{errors.length} errors.") @errors.each {|s| progress(" #{s}")} raise "Table creation failed with errors" else progress("Ok for predictive parsing") end end |
#first_follow(*starts) ⇒ EBNF
Create first/follow for each rule using techniques defined for LL(1) parsers.
58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 |
# File 'lib/ebnf/ll1.rb', line 58 def first_follow(*starts) # Add _eof to follow all start rules @starts = starts if @start = starts.first starts.each do |start| start_rule = find_rule(start) raise "No rule found for start symbol #{start}" unless start_rule start_rule.add_follow([:_eof]) start_rule.start = true end end # Comprehnsion rule, create shorter versions of all non-terminal sequences. This is necessary as the FF rules reference w', which is a comprehension. comprehensions = [] ittr = 0 depth do begin comprehensions = [] ast.select {|r| r.rule? && r.seq? && r.comp.nil? && r.expr.length > 2}.each do |rule| new_expr = rule.expr[2..-1].unshift(:seq) if new_rule = ast.detect {|r| r.expr == new_expr} # Link to existing comprehension used for another rules debug("FF.c") {"(#{ittr}) link comprehension rule for #{rule.sym} => #{new_rule.sym}[#{new_expr.inspect}]"} else new_rule = rule.build(new_expr) debug("FF.c") {"(#{ittr}) add comprehension rule for #{rule.sym} => #{new_rule.sym}[#{new_expr.inspect}]"} comprehensions << new_rule end rule.comp = new_rule end @ast += comprehensions progress("FF.c") {"(#{ittr}) comprehensions #{comprehensions.length}"} ittr += 1 end while !comprehensions.empty? ittr = 0 begin firsts, follows = 0, 0 # add Fi(wi) to Fi(Ai) for every rule Ai → wi # # For sequences, this is the first rule in the sequence. # For alts, this is every rule in the sequence each(:rule) do |ai| # Fi(a w' ) = { a } for every terminal a ai.terminals(ast).each do |t| debug("Fi.2.1") {"(#{ittr}) add terminal #{t} to #{ai.sym}"} firsts += ai.add_first([t]) end ai.non_terminals(ast).select(&:first).each do |a| if !a.first_includes_eps? # Fi(A w' ) = Fi(A) for every nonterminal A with ε not in Fi(A) debug("Fi.2.2") {"(#{ittr}) add first from #{a.sym} to #{ai.sym}: #{a.first.inspect}"} firsts += ai.add_first(a.first) else # Fi(A w' ) = Fi(A) \ { ε } ∪ Fi(w' ) for every nonterminal A with ε in Fi(A) if ai.seq? # w' is either comprehnsion of ai, or empty, if there is no comprehension comp = ai.comp || find_rule(:_empty) fi = a.first - [:_eps] + (comp.first || []) debug("Fi.2.3a") {"(#{ittr}) add first #{fi.inspect} from #{a.sym} and #{comp.sym} to #{ai.sym}"} firsts += ai.add_first(fi) else # ai is an alt, so there are no comprehensions of non-terminals, add Fi(A) including ε debug("Fi.2.3b") {"(#{ittr}) add first #{a.first} from #{a.sym} to #{ai.sym}"} firsts += ai.add_first(a.first) end end end end # # Fi(ε) = { ε } # Add _eps as a first of _empty find_rule(:_empty).add_first([:_eps]) # Add follows # if there is a rule of the form Aj → wAiw' , then # First do this for the case when Ai is the first rule each(:rule) do |aj| comp = aj.comp || find_rule(:_empty) aj.non_terminals(ast).reject {|r| r.sym == :_empty}.each do |ai| # if the terminal a is in Fi(w' ), then add a to Fo(Ai) # Basically, this says that the firsts of a comprehension of a rule are the follows of the first non-terminal in the rule. if comp.first debug("Fo.2.1") {"(#{ittr}) add follow #{comp.first.inspect} from #{comp.sym} to #{ai.sym}"} follows += ai.add_follow(comp.first) end # If there is no comprehension of this rule (meaning, it is a sequence of one non-terminal), then the follows of the non-terminal include the follows of the rule. This handles rules with multiple sequences because it will have a comprehension that includes the last element in the sequence if !aj.comp && aj.follow debug("Fo.2.1a") {"(#{ittr}) add follow #{aj.follow.inspect} from #{aj.sym} to #{ai.sym}"} follows += ai.add_follow(aj.follow) end # if ε is in Fi(w' ), then add Fo(Aj) to Fo(Ai) if comp.first_includes_eps? && aj.follow debug("Fo.2.2") {"(#{ittr}) add follow #{aj.follow.inspect} from #{aj.sym} to #{ai.sym}"} follows += ai.add_follow(aj.follow) end end # Since the rules are of the form wAiw', and we've handled the case which is just Aiw', this leaves those cases that have rules prior to Ai. This basically says that the follows of a rule are added to the follows of the comprehension of the rule if aj.comp && aj.follow debug("Fo.2.3") {"(#{ittr}) add follow #{aj.follow.inspect} from #{aj.sym} to #{aj.comp.sym}"} follows += aj.comp.add_follow(aj.follow) end end progress("first_follow") {"(#{ittr}) firsts #{firsts}, follows #{follows}"} ittr += 1 end while (firsts + follows) > 0 end end |
#outputTable(io, name, table, indent = 0) ⇒ Object
Generate an output table in Ruby format
242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 |
# File 'lib/ebnf/ll1.rb', line 242 def outputTable(io, name, table, indent = 0) ind0 = ' ' * indent ind1 = ind0 + ' ' ind2 = ind1 + ' ' if table.is_a?(Hash) io.puts "#{ind0}#{name} = {" table.keys.sort_by{|t| t.to_s.sub(/^_/, '')}.each do |prod| case table[prod] when Symbol, String io.puts "#{ind1}#{prod.inspect} => #{table[prod].inspect}," when Array list = table[prod].map(&:inspect).join(",\n#{ind2}") io.puts "#{ind1}#{prod.inspect} => [\n#{ind2}#{list}]," when Hash io.puts "#{ind1}#{prod.inspect} => {" table[prod].keys.sort_by{|t| t.to_s.sub(/^_/, '')}.each do |term| list = table[prod][term].map(&:inspect).join(", ") io.puts "#{ind2}#{term.inspect} => [#{list}]," end io.puts "#{ind1}}," else "Unknown table entry type: #{table[prod].class}" end end io.puts "#{ind0}}.freeze\n" elsif table io.puts "#{ind0}#{name} = [\n#{ind1}" + table.sort_by{|t| t.to_s.sub(/^_/, '')}.map(&:inspect).join(",\n#{ind1}") + "\n#{ind0}].freeze\n" end end |