Method: Sequence#initialize
- Defined in:
- lib/merge3.rb
#initialize(too_chunk, start_chunk, start_matches) ⇒ Sequence
find the difference from (file, or byte array) too to start (matches are precalculated matches for start ) start. Collect the differences as chunks (copies or adds) and mark deletes The algorithm is “greedy”, meaning if it finds a match (using the matches) it tries to extend in both directions
280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 |
# File 'lib/merge3.rb', line 280 def initialize too_chunk , start_chunk , start_matches @chunks = [] @org_chunks = [] # run by length to get big (sure) hits first (-1 == newlines) [ -1 , 48 , 32, 16 ].each do |c_size| two_matches = Merge3::matches(too_chunk , c_size).each do |key , two| next if (start = start_matches[two.str]).nil? raise "Keys collide :#{start}" if start.str != two.str next if done?( two , start ) # here comes the greedy part, first left then right two_start , start_start , len = two.start , start.start , two.length puts two , "MINUS" if len <= 0 and DEBUG start_rim , edit_rim = find_left_rim( start_start , two_start ) while ( start_chunk.str[start_start - 1] == too_chunk.str[two_start - 1] ) and ( edit_rim < two_start ) and ( start_rim < start_start ) two_start -= 1 start_start -= 1 len += 1 # to keep the right end where it was end start_rim , edit_rim = find_right_rim( start_start + len , two_start + len , start_chunk.length , too_chunk.length ) while ( start_chunk.str[start_start + len ] == too_chunk.str[two_start + len ] ) and ((len + start_start) < start_rim) and ((two_start + len) < edit_rim) len += 1 end chunk = too_chunk.subchunk(two_start,len , start_start ) puts "Matched #{chunk}" if DEBUG add_chunk( chunk ) end end # now find the parts that were deleted # (gaps in the matching of the original file) each_org_pair do | org , next_org | if org.org_stop < next_org.from del_str = start_chunk.str[org.org_stop , next_org.from - org.org_stop ] puts "DELETED #{org.org_stop} --#{del_str}--" if DEBUG del = Deleted.new( org.stop , org.org_stop , del_str) add_chunk( del ) end end # now find the parts that were added (gaps in the matching of the edited file) adds = {} each_pair do |chunk , next_chunk | if chunk.stop < next_chunk.start add = too_chunk.subchunk( chunk.stop , next_chunk.start - chunk.stop , -1 ) puts "ADDING #{add} " if DEBUG chunk.added = add # hang the add onto the chunk # this following logic has fixed some cases, where the matching had # been somewhat unintuitive. Though correct it produced # unneccessary conflicts, in conjunction with deletes while add.str[0] == next_chunk.str[0] and add.str[0] != 32 and # not spaces, avoid whitespace headaches chunk.org_stop == next_chunk.from puts "before: #{chunk} \nNext:#{next_chunk}" if DEBUG add.rotate #put the first to the end remove_chunk( next_chunk ) # move the first from the next to the end of the last chunk.push( next_chunk.pop ) puts "after: #{chunk} \nNext:#{next_chunk}" if DEBUG add_chunk( next_chunk ) end unless chunk.kind_of?(Deleted) or next_chunk.kind_of?(Deleted) end end end |