Method: Sequence#initialize

Defined in:: lib/merge3.rb

#initialize(too_chunk, start_chunk, start_matches) ⇒ `Sequence`

find the difference from (file, or byte array) too to start (matches are precalculated matches for start ) start. Collect the differences as chunks (copies or adds) and mark deletes The algorithm is “greedy”, meaning if it finds a match (using the matches) it tries to extend in both directions

# File 'lib/merge3.rb', line 280

def initialize too_chunk , start_chunk , start_matches
  @chunks = []
  @org_chunks = []
  # run by length to get big (sure) hits first (-1 == newlines)
  [ -1 , 48 , 32, 16 ].each  do |c_size|
    two_matches = Merge3::matches(too_chunk , c_size).each do |key , two|  
      next if   (start = start_matches[two.str]).nil? 
      raise "Keys collide :#{start}"   if  start.str != two.str
      next if done?( two , start ) 
      # here comes the greedy part, first left then right
      two_start , start_start , len  = two.start , start.start , two.length
      puts two , "MINUS" if len <=  0 and DEBUG
      start_rim , edit_rim = find_left_rim( start_start , two_start )
      while ( start_chunk.str[start_start - 1] == too_chunk.str[two_start - 1] ) and 
          (  edit_rim < two_start  )  and (  start_rim < start_start )
        two_start -= 1
        start_start -= 1
        len += 1 # to keep the right end where it was
      end
      start_rim , edit_rim = find_right_rim( start_start + len , two_start + len , start_chunk.length , too_chunk.length )
      while ( start_chunk.str[start_start + len  ] == too_chunk.str[two_start + len ] ) and 
          ((len + start_start) < start_rim)  and ((two_start + len) < edit_rim) 
        len += 1
      end
      chunk =  too_chunk.subchunk(two_start,len , start_start )
      puts "Matched #{chunk}" if DEBUG 
      add_chunk( chunk )
    end
  end
  # now find the parts that were deleted
  #  (gaps in the matching of the original file)
  each_org_pair do | org , next_org |
    if org.org_stop < next_org.from  
      del_str = start_chunk.str[org.org_stop , next_org.from  - org.org_stop ]
      puts "DELETED #{org.org_stop} --#{del_str}--" if DEBUG
      del = Deleted.new( org.stop , org.org_stop , del_str)
      add_chunk( del )
    end
  end
  # now find the parts that were added (gaps in the matching of the edited file)
  adds = {}
  each_pair do |chunk , next_chunk |
    if chunk.stop < next_chunk.start 
      add = too_chunk.subchunk( chunk.stop , next_chunk.start - chunk.stop , -1 )
      puts "ADDING  #{add} "  if DEBUG
      chunk.added =  add   # hang the add onto the chunk
      # this following logic has fixed some cases, where the matching had 
      # been somewhat unintuitive. Though correct it produced
      # unneccessary conflicts, in conjunction with deletes
      while add.str[0] == next_chunk.str[0]  and
          add.str[0] != 32 and    # not spaces, avoid whitespace headaches
          chunk.org_stop == next_chunk.from  
        puts "before: #{chunk} \nNext:#{next_chunk}"  if DEBUG
        add.rotate  #put the first to the end
        remove_chunk( next_chunk )
        # move the first from the next to the end of the last
        chunk.push( next_chunk.pop )  
        puts "after: #{chunk} \nNext:#{next_chunk}"  if DEBUG
        add_chunk( next_chunk )
      end unless chunk.kind_of?(Deleted) or next_chunk.kind_of?(Deleted)
    end
  end
end

Method: Sequence#initialize

#initialize(too_chunk, start_chunk, start_matches) ⇒ Sequence

#initialize(too_chunk, start_chunk, start_matches) ⇒ `Sequence`