Class: Sequence
- Inherits:
-
Object
- Object
- Sequence
- Defined in:
- lib/merge3.rb
Overview
the sequence keeps the chunks as lists, one in edited file order and one in original file order
Instance Attribute Summary collapse
-
#chunks ⇒ Object
readonly
the list of chunks, sorted in order of the edited file.
-
#org_chunks ⇒ Object
readonly
the list of chunks, sorted in order of the original file.
Instance Method Summary collapse
-
#add_chunk(chunk) ⇒ Object
add this chunk.
-
#break_at(chunk, pos) ⇒ Object
split in two at pos (thus assuming pos is included) and link the.
-
#done?(two, org) ⇒ Boolean
that part is done if the are (start-stop) is already included in the patches (first if) or the org part overlaps any of the patches org’s the second case is then a copy.
-
#each ⇒ Object
yields all chunks in the sequence in edited file order.
-
#each_org ⇒ Object
yields all chunks in original file order.
-
#each_org_pair ⇒ Object
helper to iterate over each pair in the original file order.
-
#each_pair ⇒ Object
helper to iterate over each pair in the edited file order.
-
#find_from(pos) ⇒ Object
find the chunk that starts at the given position in the original file.
-
#find_left_rim(org, pos) ⇒ Object
find the chunk left to these positions (original and edited) and return the maximum bound a new chunk can expand too in both coordinates.
-
#find_right_rim(start, two, start_start, two_start) ⇒ Object
find the chunk right to these positions (original and edited) and return the maximum bound a new chunk can expand too in both coordinates.
-
#get_next(prev) ⇒ Object
get the next chunk in edited file order.
-
#initialize(too_chunk, start_chunk, start_matches) ⇒ Sequence
constructor
find the difference from (file, or byte array) too to start (matches are precalculated matches for start ) start.
-
#no_overlap(sequence) ⇒ Object
change all chunks in this sequence so that they don’t have overlaps with any of the chunks in the given sequence.
-
#remove_chunk(chunk) ⇒ Object
for rotation, will be readded.
-
#to_s ⇒ Object
output both lists of chunks, edited and original, with chunks and numbers but strings only up to 70 chars (for readability).
Constructor Details
#initialize(too_chunk, start_chunk, start_matches) ⇒ Sequence
find the difference from (file, or byte array) too to start (matches are precalculated matches for start ) start. Collect the differences as chunks (copies or adds) and mark deletes The algorithm is “greedy”, meaning if it finds a match (using the matches) it tries to extend in both directions
280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 |
# File 'lib/merge3.rb', line 280 def initialize too_chunk , start_chunk , start_matches @chunks = [] @org_chunks = [] # run by length to get big (sure) hits first (-1 == newlines) [ -1 , 48 , 32, 16 ].each do |c_size| two_matches = Merge3::matches(too_chunk , c_size).each do |key , two| next if (start = start_matches[two.str]).nil? raise "Keys collide :#{start}" if start.str != two.str next if done?( two , start ) # here comes the greedy part, first left then right two_start , start_start , len = two.start , start.start , two.length puts two , "MINUS" if len <= 0 and DEBUG start_rim , edit_rim = find_left_rim( start_start , two_start ) while ( start_chunk.str[start_start - 1] == too_chunk.str[two_start - 1] ) and ( edit_rim < two_start ) and ( start_rim < start_start ) two_start -= 1 start_start -= 1 len += 1 # to keep the right end where it was end start_rim , edit_rim = find_right_rim( start_start + len , two_start + len , start_chunk.length , too_chunk.length ) while ( start_chunk.str[start_start + len ] == too_chunk.str[two_start + len ] ) and ((len + start_start) < start_rim) and ((two_start + len) < edit_rim) len += 1 end chunk = too_chunk.subchunk(two_start,len , start_start ) puts "Matched #{chunk}" if DEBUG add_chunk( chunk ) end end # now find the parts that were deleted # (gaps in the matching of the original file) each_org_pair do | org , next_org | if org.org_stop < next_org.from del_str = start_chunk.str[org.org_stop , next_org.from - org.org_stop ] puts "DELETED #{org.org_stop} --#{del_str}--" if DEBUG del = Deleted.new( org.stop , org.org_stop , del_str) add_chunk( del ) end end # now find the parts that were added (gaps in the matching of the edited file) adds = {} each_pair do |chunk , next_chunk | if chunk.stop < next_chunk.start add = too_chunk.subchunk( chunk.stop , next_chunk.start - chunk.stop , -1 ) puts "ADDING #{add} " if DEBUG chunk.added = add # hang the add onto the chunk # this following logic has fixed some cases, where the matching had # been somewhat unintuitive. Though correct it produced # unneccessary conflicts, in conjunction with deletes while add.str[0] == next_chunk.str[0] and add.str[0] != 32 and # not spaces, avoid whitespace headaches chunk.org_stop == next_chunk.from puts "before: #{chunk} \nNext:#{next_chunk}" if DEBUG add.rotate #put the first to the end remove_chunk( next_chunk ) # move the first from the next to the end of the last chunk.push( next_chunk.pop ) puts "after: #{chunk} \nNext:#{next_chunk}" if DEBUG add_chunk( next_chunk ) end unless chunk.kind_of?(Deleted) or next_chunk.kind_of?(Deleted) end end end |
Instance Attribute Details
#chunks ⇒ Object (readonly)
the list of chunks, sorted in order of the edited file
259 260 261 |
# File 'lib/merge3.rb', line 259 def chunks @chunks end |
#org_chunks ⇒ Object (readonly)
the list of chunks, sorted in order of the original file
260 261 262 |
# File 'lib/merge3.rb', line 260 def org_chunks @org_chunks end |
Instance Method Details
#add_chunk(chunk) ⇒ Object
add this chunk. This implementation keeps the order of both arrays correct all the time
381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 |
# File 'lib/merge3.rb', line 381 def add_chunk( chunk ) raise "Nil added " if not chunk raise "False added " if chunk == false @chunks.push( chunk ) @chunks.sort! { |a , b | ret = a.start <=> b.start # so this adds the Deleted chunks into the edited file too if ret == 0 ret = -1 if a.kind_of? Deleted ret = 1 if b.kind_of? Deleted end ret } @org_chunks.push(chunk) @org_chunks.sort! do |a , b | a.from <=> b.from end end |
#break_at(chunk, pos) ⇒ Object
split in two at pos (thus assuming pos is included) and link the
484 485 486 487 488 489 490 491 492 493 494 495 496 497 |
# File 'lib/merge3.rb', line 484 def break_at( chunk , pos ) return if pos == chunk.from or pos == chunk.org_stop old_s = chunk.to_s new_chunk = chunk.split_at_org!(pos) if not new_chunk.kind_of? Deleted new_chunk.added = chunk.added chunk.added = nil end #puts "SPLIT #{pos}\nBEFORE #{old_s}\nOLD:#{chunk} \nNEW:#{new_chunk} \n" if DEBUG idx = @chunks.index(chunk) @chunks[idx+1,0] = new_chunk idx = @org_chunks.index(chunk) @org_chunks[idx+1,0] = new_chunk end |
#done?(two, org) ⇒ Boolean
that part is done if the are (start-stop) is already included in the patches (first if) or the org part overlaps any of the patches org’s the second case is then a copy
445 446 447 448 449 450 451 452 453 454 455 |
# File 'lib/merge3.rb', line 445 def done? two , org #puts "checking done? \n #{two} \n #{org} " each do |chunk| return true if chunk.overlaps? two end each_org do |chunk| return true if chunk.org_overlaps? org end #puts "NOT DONE? : \n #{self}" false end |
#each ⇒ Object
yields all chunks in the sequence in edited file order
458 459 460 461 462 |
# File 'lib/merge3.rb', line 458 def each @chunks.each do |chunk| yield chunk if chunk end end |
#each_org ⇒ Object
yields all chunks in original file order
465 466 467 468 469 |
# File 'lib/merge3.rb', line 465 def each_org @org_chunks.each do |chunk| yield chunk if chunk end end |
#each_org_pair ⇒ Object
helper to iterate over each pair in the original file order
358 359 360 361 362 363 364 365 366 367 368 |
# File 'lib/merge3.rb', line 358 def each_org_pair return if @org_chunks.length < 2 idx = 1 chunk = @org_chunks[0] while idx < @org_chunks.length next_chunk = @org_chunks[idx] yield(chunk , next_chunk) chunk = next_chunk idx = idx + 1 end end |
#each_pair ⇒ Object
helper to iterate over each pair in the edited file order
345 346 347 348 349 350 351 352 353 354 355 |
# File 'lib/merge3.rb', line 345 def each_pair return if @chunks.length < 2 idx = 1 chunk = @chunks[0] while idx < @chunks.length next_chunk = @chunks[idx] yield(chunk , next_chunk) chunk = next_chunk idx = idx + 1 end end |
#find_from(pos) ⇒ Object
find the chunk that starts at the given position in the original file
404 405 406 407 408 409 410 411 412 |
# File 'lib/merge3.rb', line 404 def find_from pos each_org do | chunk | #puts "FOUND for #{pos} #{chunk}" return chunk if chunk.from == pos end raise "Not found #{pos} in\n#{self}" # hmmm this was here for some reason I don't recall # return chunk.next if chunk.added and chunk.added.start == pos end |
#find_left_rim(org, pos) ⇒ Object
find the chunk left to these positions (original and edited) and return the maximum bound a new chunk can expand too in both coordinates.
416 417 418 419 420 421 422 423 424 425 426 |
# File 'lib/merge3.rb', line 416 def find_left_rim org , pos start_rim , two_rim = 0 ,0 each_org do | two | start_rim = two.org_stop if start_rim <= two.org_stop and two.org_stop <= org end each do | one | two_rim = one.stop if two_rim <= one.stop and one.stop <= pos end #puts "Left rim #{org} #{pos} is #{start_rim} #{two_rim}" if DEBUG return start_rim , two_rim end |
#find_right_rim(start, two, start_start, two_start) ⇒ Object
find the chunk right to these positions (original and edited) and return the maximum bound a new chunk can expand too in both coordinates.
430 431 432 433 434 435 436 437 438 439 440 |
# File 'lib/merge3.rb', line 430 def find_right_rim start , two , start_start , two_start start_rim , two_rim = start_start , two_start each_org do | tw | start_rim = tw.from if start <= tw.from and tw.from <= start_rim end each do | one | two_rim = one.start if two <= one.start and one.start <= two_rim end #puts "\Right rim #{start} #{two} is #{start_rim} #{two_rim}" if DEBUG return start_rim , two_rim end |
#get_next(prev) ⇒ Object
get the next chunk in edited file order
371 372 373 374 375 376 377 |
# File 'lib/merge3.rb', line 371 def get_next( prev ) idx = @chunks.index prev return nil unless idx return nil if idx == @chunks.length - 1 nekst = @chunks[idx + 1 ] return nekst end |
#no_overlap(sequence) ⇒ Object
change all chunks in this sequence so that they don’t have overlaps with any of the chunks in the given sequence. done both ways results in non-overlapping sequences
474 475 476 477 478 479 480 481 |
# File 'lib/merge3.rb', line 474 def no_overlap sequence each_org do |one| sequence.each_org do |two| break_at( one , two.from ) if one.org_includes?( two.from ) break_at( one , two.org_stop ) if one.org_includes?( two.org_stop ) end end end |
#remove_chunk(chunk) ⇒ Object
for rotation, will be readded
398 399 400 401 |
# File 'lib/merge3.rb', line 398 def remove_chunk( chunk ) # for rotation, will be readded @chunks.delete(chunk) @org_chunks.delete(chunk) end |
#to_s ⇒ Object
output both lists of chunks, edited and original, with chunks and numbers but strings only up to 70 chars (for readability)
264 265 266 267 268 269 270 271 272 273 |
# File 'lib/merge3.rb', line 264 def to_s ret = "" @chunks.each_with_index do | chunk , index | ret += index.to_s + " - " + chunk.to_s + "\n" end @org_chunks.each_with_index do | chunk , index | ret += index.to_s + " - " + chunk.to_s + "\n" end return ret end |