Method: Sequence#initialize

Defined in:
lib/merge3.rb

#initialize(too_chunk, start_chunk, start_matches) ⇒ Sequence

find the difference from (file, or byte array) too to start (matches are precalculated matches for start ) start. Collect the differences as chunks (copies or adds) and mark deletes The algorithm is “greedy”, meaning if it finds a match (using the matches) it tries to extend in both directions



280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
# File 'lib/merge3.rb', line 280

def initialize too_chunk , start_chunk , start_matches
  @chunks = []
  @org_chunks = []
  # run by length to get big (sure) hits first (-1 == newlines)
  [ -1 , 48 , 32, 16 ].each  do |c_size|
    two_matches = Merge3::matches(too_chunk , c_size).each do |key , two|  
      next if   (start = start_matches[two.str]).nil? 
      raise "Keys collide :#{start}"   if  start.str != two.str
      next if done?( two , start ) 
      # here comes the greedy part, first left then right
      two_start , start_start , len  = two.start , start.start , two.length
      puts two , "MINUS" if len <=  0 and DEBUG
      start_rim , edit_rim = find_left_rim( start_start , two_start )
      while ( start_chunk.str[start_start - 1] == too_chunk.str[two_start - 1] ) and 
          (  edit_rim < two_start  )  and (  start_rim < start_start )
        two_start -= 1
        start_start -= 1
        len += 1 # to keep the right end where it was
      end
      start_rim , edit_rim = find_right_rim( start_start + len , two_start + len , start_chunk.length , too_chunk.length )
      while ( start_chunk.str[start_start + len  ] == too_chunk.str[two_start + len ] ) and 
          ((len + start_start) < start_rim)  and ((two_start + len) < edit_rim) 
        len += 1
      end
      chunk =  too_chunk.subchunk(two_start,len , start_start )
      puts "Matched #{chunk}" if DEBUG 
      add_chunk( chunk )
    end
  end
  # now find the parts that were deleted
  #  (gaps in the matching of the original file)
  each_org_pair do | org , next_org |
    if org.org_stop < next_org.from  
      del_str = start_chunk.str[org.org_stop , next_org.from  - org.org_stop ]
      puts "DELETED #{org.org_stop} --#{del_str}--" if DEBUG
      del = Deleted.new( org.stop , org.org_stop , del_str)
      add_chunk( del )
    end
  end
  # now find the parts that were added (gaps in the matching of the edited file)
  adds = {}
  each_pair do |chunk , next_chunk |
    if chunk.stop < next_chunk.start 
      add = too_chunk.subchunk( chunk.stop , next_chunk.start - chunk.stop , -1 )
      puts "ADDING  #{add} "  if DEBUG
      chunk.added =  add   # hang the add onto the chunk
      # this following logic has fixed some cases, where the matching had 
      # been somewhat unintuitive. Though correct it produced
      # unneccessary conflicts, in conjunction with deletes
      while add.str[0] == next_chunk.str[0]  and
          add.str[0] != 32 and    # not spaces, avoid whitespace headaches
          chunk.org_stop == next_chunk.from  
        puts "before: #{chunk} \nNext:#{next_chunk}"  if DEBUG
        add.rotate  #put the first to the end
        remove_chunk( next_chunk )
        # move the first from the next to the end of the last
        chunk.push( next_chunk.pop )  
        puts "after: #{chunk} \nNext:#{next_chunk}"  if DEBUG
        add_chunk( next_chunk )
      end unless chunk.kind_of?(Deleted) or next_chunk.kind_of?(Deleted)
    end
  end
end