Module: ThompsonConstruction
- Included in:
- ScannerGenerator::FiniteStateMachine
- Defined in:
- lib/scanner_generator/thompson_construction.rb
Constant Summary collapse
- PENDING =
0
Instance Method Summary collapse
- #absorb_left_alt(machines) ⇒ Object
- #absorb_right_alt(machines) ⇒ Object
- #alt_machine ⇒ Object
-
#build_machine_stack(re) ⇒ Object
Thompson-McNaughton-Yamada Construction Section.
- #cat_machine(ch) ⇒ Object
- #catify(machines) ⇒ Object
- #clone ⇒ Object
- #get_char_class(str) ⇒ Object
- #handle_alternation(machines) ⇒ Object
-
#imp_attach_graph(attach_point, fsm) ⇒ Object
imp_attach_graph: increments fsm’s node numbers by 1-CALLER.node_count takes edges outgoing from fsm.origin and adds them to attach_point.
- #kleene_machine ⇒ Object
- #kleene_up(machines) ⇒ Object
- #lambda_inject_graph(graph, src, dest) ⇒ Object
- #lambda_replace_edge(src, label, dest, graph) ⇒ Object
- #plus_machine ⇒ Object
- #prepend_graph(fsm) ⇒ Object
- #question_machine ⇒ Object
-
#re2nfa(re) ⇒ Object
make one pass forwards, completing all kleene stars and all alt LHSs make one pass backwards, completing all alt RHSs if any unfulfilled dependencies remain, my assumptions were mistaken.
-
#renumber! ⇒ Object
ensure no gaps in our node names!.
-
#replace_edge(src, label, dest, graph) ⇒ Object
graph edges going from origin become outgoing from src graph edges going TO final state instead go TO dest What is “final state?” Any accept state? @graph_hash[label].delete(dest).
- #retarget_edges(old_dest, new_dest) ⇒ Object
- #set_new_accept(node_number, type = 'end') ⇒ Object
Instance Method Details
#absorb_left_alt(machines) ⇒ Object
134 135 136 137 138 139 140 141 142 143 144 145 146 |
# File 'lib/scanner_generator/thompson_construction.rb', line 134 def absorb_left_alt(machines) new_machines = Array.new machines.each_with_index do |mach,ii| if mach[1].nil? || mach[1].empty? # This machine is complete. new_machines.push([mach[0],nil]) else src, dest = mach[1].shift, mach[1].shift m = mach[0].replace_edge(src,PENDING,dest,new_machines.pop[0]) # NON-LAMBDA VERSION new_machines.push([m,mach[1]]) end end new_machines end |
#absorb_right_alt(machines) ⇒ Object
148 149 150 |
# File 'lib/scanner_generator/thompson_construction.rb', line 148 def absorb_right_alt(machines) absorb_left_alt(machines.reverse).reverse end |
#alt_machine ⇒ Object
224 225 226 227 228 229 230 231 232 233 234 235 |
# File 'lib/scanner_generator/thompson_construction.rb', line 224 def alt_machine FiniteStateMachine.new({ :accept_states => {5=>'end'}, :graph_hash => { 0 => {LAMBDA => [1,3]}, 1 => {PENDING => [2]}, 2 => {LAMBDA => [5]}, 3 => {PENDING => [4]}, 4 => {LAMBDA => [5]} } }) end |
#build_machine_stack(re) ⇒ Object
Thompson-McNaughton-Yamada Construction Section
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
# File 'lib/scanner_generator/thompson_construction.rb', line 6 def build_machine_stack(re) skip = 0 escaped = false machines = Array.new (0...re.length).each do |ii| # the pointer in some cases. (skip -= 1) && next if skip != 0 # Advance ptr until past () group ch = re[ii] #re[-ii-1] if escaped case ch when 'n' machines.push([cat_machine("\n"), nil]) else machines.push([cat_machine(ch), nil]) end escaped = false next end case(ch) when '*' then machines.push([kleene_machine, [1,2]]) when '+' then machines.push([plus_machine, [1,2]]) #when '+' then machines.push([plus_machine, [[0,1],[1,1]]]) when '?' then machines.push([question_machine, [0,1]]) when '|' then machines.push([alt_machine, [1,2,3,4]]) when ']' then raise "mismatched bracket closed a non-open class" when ')' then raise "mismatched paren closed a non-open group" when '('# ; puts "#{ms}\tGRPOPEN\nencounted closing paren. following chars #{re[ii+1]}#{re[ii+2]}" subexpression = '' nesting = 0 until (ch2 = re[ii+=1]) == ')' && nesting == 0 # Until the next character is '(' nesting -= 1 if ch2 == ')' nesting += 1 if ch2 == '(' subexpression << ch2 #skip += 1 end #skip += 1 subgraph = re2nfa(subexpression) skip = subexpression.length+1 # the +1 is used to skip the closing ) machines.push([subgraph, nil]) when '[' char_class = get_char_class(re[ii..-1]) # search rest of the string for []-expression machines.push([cat_machine(/#{char_class}/), nil]) skip = char_class.length - 1 + char_class.scan(/\\/).length # compensate for 2 '\'s counting as 1 # The below skip assignment works if we want to allow for odd numbers of slashes, but it's # not desirable, because it would allow [\n] to be [n]. # We're reserving \ for escaping *, +, ?, etc. symbols. #skip = char_class.length - 1 + # char_class.scan(/\\/).length*2 - # char_class.scan(/\\\\/).length # compensate for 2 '\'s counting as 1 when '\\' #; escaped = true unless escaped== true if escaped # '\\' -> cat a slash machines.push([cat_machine(ch), nil]) escaped = false else escaped = true end else machines.push([cat_machine(ch), nil]) end end machines end |
#cat_machine(ch) ⇒ Object
204 205 206 207 208 209 |
# File 'lib/scanner_generator/thompson_construction.rb', line 204 def cat_machine(ch) FiniteStateMachine.new({ :accept_states => {1=>'end'}, :graph_hash => {0 => {ch => [1]}} }) end |
#catify(machines) ⇒ Object
105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 |
# File 'lib/scanner_generator/thompson_construction.rb', line 105 def catify(machines) new_machines = Array.new machines.each_with_index do |mach,ii| if ii == 0 new_machines.push([mach[0],nil]) elsif (mach[1].nil? && machines[ii-1][1].nil?) # This machine AND PREVIOUS are each a cat or finished */?/+ # This code is suspiciously similar to the wrap-up code of re2nfa() # which implies that it's not DRY. This is something to revisit. lead = new_machines.pop[0] offset = lead.get_node_count-1 acc = lead.accept_states.keys.first || 0 lead.imp_attach_graph(acc,mach[0]) lead.accept_states.delete_if do |acc_st| !mach[0].accept_states.keys.include?(acc_st-offset) end new_machines.push([lead,nil]) else new_machines.push([mach[0],mach[1]]) end end new_machines end |
#clone ⇒ Object
358 359 360 |
# File 'lib/scanner_generator/thompson_construction.rb', line 358 def clone Marshal.load( Marshal.dump(self) ) end |
#get_char_class(str) ⇒ Object
68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
# File 'lib/scanner_generator/thompson_construction.rb', line 68 def get_char_class(str) escaped = false result = '' str.each_char.with_index do |ch,ii| if escaped == false && ch == ']' # done reading current class result += ch return result elsif escaped == true result = result[0..-2]+ch else result += ch end escaped = (ch == '\\' && escaped==false) end raise 'character class improperly closed!' end |
#handle_alternation(machines) ⇒ Object
129 130 131 132 |
# File 'lib/scanner_generator/thompson_construction.rb', line 129 def handle_alternation(machines) machines = absorb_left_alt(machines) machines = absorb_right_alt(machines) end |
#imp_attach_graph(attach_point, fsm) ⇒ Object
imp_attach_graph: increments fsm’s node numbers by 1-CALLER.node_count
takes edges outgoing from fsm.origin and adds them to attach_point
313 314 315 316 317 318 319 320 321 322 323 324 325 |
# File 'lib/scanner_generator/thompson_construction.rb', line 313 def imp_attach_graph(attach_point, fsm) my_node_count = get_node_count graft = fsm.clone graft.increment_node_labels(my_node_count-1) # prevent collisions graft_root_edges = graft.graph_hash.delete(graft.origin) @graph_hash[attach_point] ||= Hash.new @graph_hash[attach_point].merge!(graft_root_edges) @accept_states.merge!(graft.accept_states) @graph_hash.merge!(graft.graph_hash) get_node_count end |
#kleene_machine ⇒ Object
237 238 239 240 241 242 243 244 245 246 |
# File 'lib/scanner_generator/thompson_construction.rb', line 237 def kleene_machine FiniteStateMachine.new({ :accept_states => {3=>'end'}, :graph_hash => { 0 => {LAMBDA => [1,3]}, 1 => {PENDING => [2]}, 2 => {LAMBDA => [1,3]} } }) end |
#kleene_up(machines) ⇒ Object
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
# File 'lib/scanner_generator/thompson_construction.rb', line 86 def kleene_up(machines) new_machines = Array.new machines.each_with_index do |mach,ii| if mach[1].nil? || mach[1].empty? # This machine is complete. new_machines.push([mach[0],nil]) else if mach[1].length == 2 # Deals with *, ?, and +, who all have same precedence src, dest = mach[1].shift, mach[1].shift #m = mach[0].lambda_replace_edge(src,PENDING,dest,new_machines.pop) # LAMBDA VERSION m = mach[0].replace_edge(src,PENDING,dest,new_machines.pop[0]) # NON-LAMBDA VERSION new_machines.push([m,nil]) else # dealing with | new_machines.push([mach[0],mach[1]]) end end end new_machines end |
#lambda_inject_graph(graph, src, dest) ⇒ Object
350 351 352 353 354 355 356 |
# File 'lib/scanner_generator/thompson_construction.rb', line 350 def lambda_inject_graph(graph, src, dest) old_node_count = get_node_count lambda_attach_graph(src, graph) graph.accept_states.keys.each {|k| add_edge(k+old_node_count, LAMBDA, dest)} graph.accept_states.keys.each {|k| @accept_states.delete(k+old_node_count)} self end |
#lambda_replace_edge(src, label, dest, graph) ⇒ Object
340 341 342 343 344 345 346 347 348 |
# File 'lib/scanner_generator/thompson_construction.rb', line 340 def lambda_replace_edge(src, label, dest, graph) if @graph_hash[src][label].class == Fixnum @graph_hash[src][label] = [@graph_hash[src][label]] end #@graph_hash[src][label].delete(dest) lambda_inject_graph(graph,src,dest) delete_edge(src,label,dest) self end |
#plus_machine ⇒ Object
248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 |
# File 'lib/scanner_generator/thompson_construction.rb', line 248 def plus_machine FiniteStateMachine.new({ :accept_states => {3=>'end'}, :graph_hash => { 0 => {LAMBDA => [1]}, 1 => {PENDING => [2]}, 2 => {LAMBDA => [1,3]} } # The below machine would be more concise, but we'd need to add in logic to replace TWO EDGES with one absorb. #:accept_states => {1=>'end'}, #:graph_hash => { # 0 => {PENDING => [1]}, # 1 => {PENDING => [1]} #} }) end |
#prepend_graph(fsm) ⇒ Object
199 200 201 202 |
# File 'lib/scanner_generator/thompson_construction.rb', line 199 def prepend_graph(fsm) fsm.imp_attach_graph(fsm.accept_states.keys[0],self) copy(fsm) end |
#question_machine ⇒ Object
211 212 213 214 215 216 217 218 219 220 221 222 |
# File 'lib/scanner_generator/thompson_construction.rb', line 211 def question_machine FiniteStateMachine.new({ :accept_states => {1=>'end'}, :graph_hash => {0 => {PENDING => [1], LAMBDA => [1]}} #:accept_states => {3=>'end'}, #:graph_hash => { # 0 => {LAMBDA => [1,3]}, # 1 => {PENDING => [2]}, # 2 => {LAMBDA => [3]} #} }) end |
#re2nfa(re) ⇒ Object
make one pass forwards, completing all kleene stars and all alt LHSs make one pass backwards, completing all alt RHSs if any unfulfilled dependencies remain, my assumptions were mistaken
165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 |
# File 'lib/scanner_generator/thompson_construction.rb', line 165 def re2nfa(re) #puts "re2nfa: #{re}" fsconstruct = FiniteStateMachine.new({:accept_states => {0=>'eh'}, :graph_hash => {0=>{PENDING=>[0]}}}) machines = build_machine_stack(re) machines = kleene_up(machines) machines = catify(machines) machines = handle_alternation(machines) #puts "New machines:" machines.each_with_index do |mach,ii| m = mach[0] offset = fsconstruct.get_node_count-1 acc = fsconstruct.accept_states.keys.first || 0 # Attachment point is accept state fsconstruct.imp_attach_graph(acc, m) fsconstruct.accept_states.delete_if do |acc_st| #puts "purging acc #{acc}" if !m.accept_states.keys.include?(acc-offset) !m.accept_states.keys.include?(acc_st-offset) end end fsconstruct.delete_edge(0,PENDING,0) #@graph_hash = fsconstruct.graph_hash #@accept_states = fsconstruct.accept_states FiniteStateMachine.new({ :graph_hash => fsconstruct.graph_hash, :accept_states => fsconstruct.accept_states }) end |
#renumber! ⇒ Object
ensure no gaps in our node names!
300 301 302 303 304 305 306 307 308 309 |
# File 'lib/scanner_generator/thompson_construction.rb', line 300 def renumber! get_node_names.each_with_index do |n,ii| if n != ii retarget_edges(n,ii) @accept_states[ii] = @accept_states.delete(n) unless @accept_states[n].nil? @graph_hash[ii] = @graph_hash.delete(n) end end self end |
#replace_edge(src, label, dest, graph) ⇒ Object
graph edges going from origin become outgoing from src graph edges going TO final state instead go TO dest
What is "final state?" Any accept state?
@graph_hash[label].delete(dest)
272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 |
# File 'lib/scanner_generator/thompson_construction.rb', line 272 def replace_edge(src, label, dest, graph) raise "can't inject a graph that had no accept states" if graph.accept_states.nil? || graph.accept_states.empty? if @graph_hash[src][label].class == Fixnum @graph_hash[src][label] = [@graph_hash[src][label]] end offset = get_node_count-1 imp_attach_graph(src, graph) #draw_graph('intermediate-self') #graph.draw_graph('intermediate-graft') # for each of the edges pointing at the accept state of the graph # redirect them to point at dest #draw_graph('retarget-pre') graph.accept_states.keys.each do |acc| retarget_edges(acc+offset,dest) accept_states.delete(acc+offset) end delete_edge(src,label,dest) renumber! #draw_graph('retarget-post') self end |
#retarget_edges(old_dest, new_dest) ⇒ Object
327 328 329 330 331 332 333 334 335 336 337 338 |
# File 'lib/scanner_generator/thompson_construction.rb', line 327 def retarget_edges(old_dest, new_dest) @graph_hash.each_pair do |node,edge_hash| edge_hash.each_pair do |label, dest| if dest.include? old_dest #puts "#{node}[#{label}] changed from #{dest} to #{new_dest}" add_edge( node, label, new_dest) delete_edge(node, label, old_dest) end end end self end |
#set_new_accept(node_number, type = 'end') ⇒ Object
195 196 197 |
# File 'lib/scanner_generator/thompson_construction.rb', line 195 def set_new_accept(node_number, type='end') @accept_states = {node_number => 'end'} end |