Class: Sequence

Inherits:
Object
  • Object
show all
Defined in:
lib/merge3.rb

Overview

the sequence keeps the chunks as lists, one in edited file order and one in original file order

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(too_chunk, start_chunk, start_matches) ⇒ Sequence

find the difference from (file, or byte array) too to start (matches are precalculated matches for start ) start. Collect the differences as chunks (copies or adds) and mark deletes The algorithm is “greedy”, meaning if it finds a match (using the matches) it tries to extend in both directions



280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
# File 'lib/merge3.rb', line 280

def initialize too_chunk , start_chunk , start_matches
  @chunks = []
  @org_chunks = []
  # run by length to get big (sure) hits first (-1 == newlines)
  [ -1 , 48 , 32, 16 ].each  do |c_size|
    two_matches = Merge3::matches(too_chunk , c_size).each do |key , two|  
      next if   (start = start_matches[two.str]).nil? 
      raise "Keys collide :#{start}"   if  start.str != two.str
      next if done?( two , start ) 
      # here comes the greedy part, first left then right
      two_start , start_start , len  = two.start , start.start , two.length
      puts two , "MINUS" if len <=  0 and DEBUG
      start_rim , edit_rim = find_left_rim( start_start , two_start )
      while ( start_chunk.str[start_start - 1] == too_chunk.str[two_start - 1] ) and 
          (  edit_rim < two_start  )  and (  start_rim < start_start )
        two_start -= 1
        start_start -= 1
        len += 1 # to keep the right end where it was
      end
      start_rim , edit_rim = find_right_rim( start_start + len , two_start + len , start_chunk.length , too_chunk.length )
      while ( start_chunk.str[start_start + len  ] == too_chunk.str[two_start + len ] ) and 
          ((len + start_start) < start_rim)  and ((two_start + len) < edit_rim) 
        len += 1
      end
      chunk =  too_chunk.subchunk(two_start,len , start_start )
      puts "Matched #{chunk}" if DEBUG 
      add_chunk( chunk )
    end
  end
  # now find the parts that were deleted
  #  (gaps in the matching of the original file)
  each_org_pair do | org , next_org |
    if org.org_stop < next_org.from  
      del_str = start_chunk.str[org.org_stop , next_org.from  - org.org_stop ]
      puts "DELETED #{org.org_stop} --#{del_str}--" if DEBUG
      del = Deleted.new( org.stop , org.org_stop , del_str)
      add_chunk( del )
    end
  end
  # now find the parts that were added (gaps in the matching of the edited file)
  adds = {}
  each_pair do |chunk , next_chunk |
    if chunk.stop < next_chunk.start 
      add = too_chunk.subchunk( chunk.stop , next_chunk.start - chunk.stop , -1 )
      puts "ADDING  #{add} "  if DEBUG
      chunk.added =  add   # hang the add onto the chunk
      # this following logic has fixed some cases, where the matching had 
      # been somewhat unintuitive. Though correct it produced
      # unneccessary conflicts, in conjunction with deletes
      while add.str[0] == next_chunk.str[0]  and
          add.str[0] != 32 and    # not spaces, avoid whitespace headaches
          chunk.org_stop == next_chunk.from  
        puts "before: #{chunk} \nNext:#{next_chunk}"  if DEBUG
        add.rotate  #put the first to the end
        remove_chunk( next_chunk )
        # move the first from the next to the end of the last
        chunk.push( next_chunk.pop )  
        puts "after: #{chunk} \nNext:#{next_chunk}"  if DEBUG
        add_chunk( next_chunk )
      end unless chunk.kind_of?(Deleted) or next_chunk.kind_of?(Deleted)
    end
  end
end

Instance Attribute Details

#chunksObject (readonly)

the list of chunks, sorted in order of the edited file



259
260
261
# File 'lib/merge3.rb', line 259

def chunks
  @chunks
end

#org_chunksObject (readonly)

the list of chunks, sorted in order of the original file



260
261
262
# File 'lib/merge3.rb', line 260

def org_chunks
  @org_chunks
end

Instance Method Details

#add_chunk(chunk) ⇒ Object

add this chunk. This implementation keeps the order of both arrays correct all the time



381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
# File 'lib/merge3.rb', line 381

def add_chunk( chunk )
  raise "Nil added " if not chunk 
  raise "False added " if  chunk == false 
  @chunks.push( chunk )
  @chunks.sort! {  |a , b | 
    ret = a.start <=> b.start
    # so this adds the Deleted chunks into the edited file too 
    if ret == 0 
      ret =  -1 if a.kind_of? Deleted 
      ret =  1 if b.kind_of? Deleted 
    end
    ret
  }
  @org_chunks.push(chunk)
  @org_chunks.sort! do  |a , b | a.from <=> b.from end
end

#break_at(chunk, pos) ⇒ Object

split in two at pos (thus assuming pos is included) and link the



484
485
486
487
488
489
490
491
492
493
494
495
496
497
# File 'lib/merge3.rb', line 484

def break_at( chunk , pos )
  return if pos == chunk.from or pos == chunk.org_stop
  old_s = chunk.to_s
  new_chunk = chunk.split_at_org!(pos) 
  if not new_chunk.kind_of? Deleted
    new_chunk.added = chunk.added
    chunk.added = nil
  end
  #puts "SPLIT  #{pos}\nBEFORE #{old_s}\nOLD:#{chunk} \nNEW:#{new_chunk} \n" if DEBUG
  idx = @chunks.index(chunk)
  @chunks[idx+1,0] = new_chunk
  idx = @org_chunks.index(chunk)
  @org_chunks[idx+1,0] = new_chunk
end

#done?(two, org) ⇒ Boolean

that part is done if the are (start-stop) is already included in the patches (first if) or the org part overlaps any of the patches org’s the second case is then a copy

Returns:

  • (Boolean)


445
446
447
448
449
450
451
452
453
454
455
# File 'lib/merge3.rb', line 445

def done? two , org
  #puts "checking done? \n #{two} \n #{org} "
  each do |chunk|
    return true if chunk.overlaps? two
  end
  each_org do |chunk|
    return true if chunk.org_overlaps? org
  end
  #puts "NOT DONE? : \n #{self}" 
  false
end

#eachObject

yields all chunks in the sequence in edited file order



458
459
460
461
462
# File 'lib/merge3.rb', line 458

def each 
  @chunks.each do |chunk|
    yield chunk if chunk
  end
end

#each_orgObject

yields all chunks in original file order



465
466
467
468
469
# File 'lib/merge3.rb', line 465

def each_org 
  @org_chunks.each do |chunk|
    yield chunk if chunk
  end
end

#each_org_pairObject

helper to iterate over each pair in the original file order



358
359
360
361
362
363
364
365
366
367
368
# File 'lib/merge3.rb', line 358

def each_org_pair
  return if @org_chunks.length < 2
  idx = 1 
  chunk = @org_chunks[0]
  while idx < @org_chunks.length
    next_chunk = @org_chunks[idx]
    yield(chunk , next_chunk)
    chunk = next_chunk
    idx = idx + 1
  end
end

#each_pairObject

helper to iterate over each pair in the edited file order



345
346
347
348
349
350
351
352
353
354
355
# File 'lib/merge3.rb', line 345

def each_pair
  return if @chunks.length < 2
  idx = 1 
  chunk = @chunks[0]
  while idx < @chunks.length
    next_chunk = @chunks[idx]
    yield(chunk , next_chunk)
    chunk = next_chunk
    idx = idx + 1
  end
end

#find_from(pos) ⇒ Object

find the chunk that starts at the given position in the original file



404
405
406
407
408
409
410
411
412
# File 'lib/merge3.rb', line 404

def find_from pos
  each_org do | chunk |
    #puts "FOUND for #{pos} #{chunk}" 
    return chunk if chunk.from == pos
  end
  raise "Not found #{pos} in\n#{self}" 
  # hmmm this was here for some reason I don't recall
  # return chunk.next if chunk.added and chunk.added.start == pos
end

#find_left_rim(org, pos) ⇒ Object

find the chunk left to these positions (original and edited) and return the maximum bound a new chunk can expand too in both coordinates.



416
417
418
419
420
421
422
423
424
425
426
# File 'lib/merge3.rb', line 416

def find_left_rim org , pos 
  start_rim , two_rim = 0 ,0
  each_org do | two |
    start_rim = two.org_stop if start_rim  <= two.org_stop and two.org_stop <= org 
  end
  each do | one |
    two_rim = one.stop if  two_rim <= one.stop and one.stop <= pos
  end
  #puts "Left rim #{org} #{pos} is #{start_rim} #{two_rim}" if DEBUG
  return start_rim , two_rim 
end

#find_right_rim(start, two, start_start, two_start) ⇒ Object

find the chunk right to these positions (original and edited) and return the maximum bound a new chunk can expand too in both coordinates.



430
431
432
433
434
435
436
437
438
439
440
# File 'lib/merge3.rb', line 430

def find_right_rim start , two , start_start , two_start 
  start_rim , two_rim = start_start , two_start
  each_org do | tw |
    start_rim = tw.from if start <= tw.from and tw.from <= start_rim  
  end
  each do | one |
    two_rim = one.start if two <= one.start  and one.start <= two_rim 
  end
  #puts "\Right rim #{start} #{two} is #{start_rim} #{two_rim}" if DEBUG
  return start_rim , two_rim 
end

#get_next(prev) ⇒ Object

get the next chunk in edited file order



371
372
373
374
375
376
377
# File 'lib/merge3.rb', line 371

def get_next( prev )
  idx = @chunks.index prev
  return nil unless idx
  return nil if idx == @chunks.length - 1
  nekst = @chunks[idx + 1 ]
  return nekst 
end

#no_overlap(sequence) ⇒ Object

change all chunks in this sequence so that they don’t have overlaps with any of the chunks in the given sequence. done both ways results in non-overlapping sequences



474
475
476
477
478
479
480
481
# File 'lib/merge3.rb', line 474

def no_overlap sequence
  each_org do |one|
    sequence.each_org do |two|
      break_at( one , two.from )     if one.org_includes?( two.from ) 
      break_at( one , two.org_stop ) if one.org_includes?( two.org_stop )
    end
  end 
end

#remove_chunk(chunk) ⇒ Object

for rotation, will be readded



398
399
400
401
# File 'lib/merge3.rb', line 398

def remove_chunk( chunk ) # for rotation, will be readded
  @chunks.delete(chunk)
  @org_chunks.delete(chunk)
end

#to_sObject

output both lists of chunks, edited and original, with chunks and numbers but strings only up to 70 chars (for readability)



264
265
266
267
268
269
270
271
272
273
# File 'lib/merge3.rb', line 264

def to_s
  ret = ""
  @chunks.each_with_index do | chunk , index |
    ret += index.to_s + " - " + chunk.to_s + "\n"
  end 
  @org_chunks.each_with_index do | chunk , index |
    ret += index.to_s + " - " + chunk.to_s + "\n"
  end 
  return ret 
end