Class: MyersDiff::WordDiff

Inherits:
Object
  • Object
show all
Defined in:
lib/myers_diff/word_diff.rb

Instance Method Summary collapse

Instance Method Details

#build_values(components, new_string, old_string, use_longest_token = true) ⇒ Object

new_string - tokenized string i.e. array of strings



128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
# File 'lib/myers_diff/word_diff.rb', line 128

def build_values(components, new_string, old_string, use_longest_token = true)
  component_pos = 0
  component_len = components.size
  new_pos = 0
  old_pos = 0

  while component_pos < component_len
    component = components[component_pos]
    if !component[:removed]
      if !component[:added] && use_longest_token
        value = new_string[new_pos, component[:count]]
        value = value.map.with_index do |val, i|
          old_val = old_string[old_pos + i]
          old_val.size > val.size ? old_val : val
        end

        component[:value] = join(value)
      else
        component[:value] = join(new_string[new_pos, component[:count]])
      end

      new_pos += component[:count]
      old_pos += component[:count] unless component[:added]
    else
      component[:value] = join(old_string[old_pos, component[:count]])
      old_pos += component[:count]

      if component_pos && 0 <= component_pos - 1 && components[component_pos - 1][:added]
        tmp = components[component_pos - 1]
        components[component_pos - 1] = components[component_pos]
        components[component_pos] = tmp
      end
    end

    component_pos += 1
  end

  last_component = components[component_len - 1]
  if component_len > 1 &&
      last_component[:value].is_a?(String) &&
      (last_component[:added] || last_component[:removed]) &&
      equals('', last_component[:value])
    components[component_len - 2][:value] += last_component[:value]
    components.pop
  end

  recount(components)

  components
end

#cast_input(str) ⇒ Object



115
116
117
# File 'lib/myers_diff/word_diff.rb', line 115

def cast_input(str)
  str
end

#clone_path(path_hash) ⇒ Object



179
180
181
# File 'lib/myers_diff/word_diff.rb', line 179

def clone_path(path_hash)
  { new_pos: path_hash[:new_pos], components: path_hash[:components].dup }
end

#diff(s1, s2, **options) ⇒ Object



3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# File 'lib/myers_diff/word_diff.rb', line 3

def diff(s1, s2, **options)
  old_string = cast_input(s1)
  new_string = cast_input(s2)

  old_string = remove_empty(tokenize(old_string))
  new_string = remove_empty(tokenize(new_string))

  new_len = new_string.size
  old_len = old_string.size
  edit_length = 1
  max_edit_length = new_len + old_len
  best_path = { }
  best_path[0] = { new_pos: -1, components: [] }

  old_pos = extract_common(best_path[0], new_string, old_string, 0)
  if best_path[0][:new_pos] + 1 >= new_len && old_pos + 1 >= old_len
    return [ { value: join(new_string), count: new_string.size } ]
  end

  exec_edit_length = lambda do
    diagonal_path = -1 * edit_length
    while diagonal_path <= edit_length
      add_path = best_path[diagonal_path - 1]
      remove_path = best_path[diagonal_path + 1]
      old_pos = (remove_path ? remove_path[:new_pos] : 0) - diagonal_path
      best_path[diagonal_path - 1] = nil if add_path

      can_add = add_path && add_path[:new_pos] + 1 < new_len
      can_remove = remove_path && 0 <= old_pos && old_pos < old_len
      if !can_add && !can_remove
        best_path[diagonal_path] = nil
        diagonal_path += 2
        next
      end

      base_path = if !can_add || (can_remove && add_path[:new_pos] < remove_path[:new_pos])
        p = clone_path(remove_path)
        push_component(p[:components], nil, true)
        p
      else
        p = add_path
        p[:new_pos] += 1
        push_component(p[:components], true, nil)
        p
      end

      old_pos = extract_common(base_path, new_string, old_string, diagonal_path)

      if base_path[:new_pos] + 1 >= new_len && old_pos + 1 >= old_len
        return build_values(base_path[:components], new_string, old_string)
      else
        best_path[diagonal_path] = base_path
      end

      diagonal_path += 2
    end

    edit_length += 1
    nil
  end

  while edit_length <= max_edit_length
    if res = exec_edit_length.call
      return res
    end
  end

  'death'
end

#equals(l, r) ⇒ Object



105
106
107
108
109
# File 'lib/myers_diff/word_diff.rb', line 105

def equals(l, r)
  l == r
  # TODO: support custom comparator
  # TODO: support case-insensitive
end

#extract_common(base_path, new_string, old_string, diagonal_path) ⇒ Object

base_path : { new_pos: int, components: [] } diagonal_path : int



84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# File 'lib/myers_diff/word_diff.rb', line 84

def extract_common(base_path, new_string, old_string, diagonal_path)
  new_len = new_string.size
  old_len = old_string.size
  new_pos = base_path[:new_pos]
  old_pos = new_pos - diagonal_path
  common_count = 0

  while new_pos + 1 < new_len && old_pos + 1 < old_len && equals(new_string[new_pos + 1], old_string[old_pos + 1])
    new_pos += 1
    old_pos += 1
    common_count += 1
  end

  if common_count > 0
    base_path[:components].push(count: common_count)
  end

  base_path[:new_pos] = new_pos
  old_pos
end

#join(chars) ⇒ Object



123
124
125
# File 'lib/myers_diff/word_diff.rb', line 123

def join(chars)
  chars.join(' ')
end

#push_component(components, added, removed) ⇒ Object



73
74
75
76
77
78
79
80
# File 'lib/myers_diff/word_diff.rb', line 73

def push_component(components, added, removed)
  last = components.last
  if last && last[:added] == added && last[:removed] == removed
    components[-1] = { added: last[:added], removed: last[:removed], count: last[:count] + 1 }
  else
    components.push(count: 1, added: added, removed: removed)
  end
end

#recount(components) ⇒ Object



183
184
185
# File 'lib/myers_diff/word_diff.rb', line 183

def recount(components)
  components.each { |component| component[:count] = component[:value].size }
end

#remove_empty(array) ⇒ Object



111
112
113
# File 'lib/myers_diff/word_diff.rb', line 111

def remove_empty(array)
  array.compact
end

#tokenize(str) ⇒ Object



119
120
121
# File 'lib/myers_diff/word_diff.rb', line 119

def tokenize(str)
  str.split(' ')
end