Class: MyersDiff::WordDiff

Inherits:
Object
  • Object
show all
Defined in:
lib/myers_diff/word_diff.rb

Constant Summary collapse

BOUNDARIES_REGEX =
/(\s+|[()\[\]{}'"]|\b)/.freeze
EXTENDED_WORD_CHARS_REGEX =
/^[a-zA-Z\u{C0}-\u{FF}\u{D8}-\u{F6}\u{F8}-\u{2C6}\u{2C8}-\u{2D7}\u{2DE}-\u{2FF}\u{1E00}-\u{1EFF}]+$/u.freeze

Instance Method Summary collapse

Instance Method Details

#build_values(components, new_string, old_string, use_longest_token = true) ⇒ Object

new_string - tokenized string i.e. array of strings



146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
# File 'lib/myers_diff/word_diff.rb', line 146

def build_values(components, new_string, old_string, use_longest_token = true)
  component_pos = 0
  component_len = components.size
  new_pos = 0
  old_pos = 0

  while component_pos < component_len
    component = components[component_pos]
    if !component[:removed]
      if !component[:added] && use_longest_token
        value = new_string[new_pos, component[:count]]
        value = value.map.with_index do |val, i|
          old_val = old_string[old_pos + i]
          old_val.size > val.size ? old_val : val
        end

        component[:value] = join(value)
      else
        component[:value] = join(new_string[new_pos, component[:count]])
      end

      new_pos += component[:count]
      old_pos += component[:count] unless component[:added]
    else
      component[:value] = join(old_string[old_pos, component[:count]])
      old_pos += component[:count]

      if component_pos && 0 <= component_pos - 1 && components[component_pos - 1][:added]
        tmp = components[component_pos - 1]
        components[component_pos - 1] = components[component_pos]
        components[component_pos] = tmp
      end
    end

    component_pos += 1
  end

  last_component = components[component_len - 1]
  if component_len > 1 &&
      last_component[:value].is_a?(String) &&
      (last_component[:added] || last_component[:removed]) &&
      equals('', last_component[:value])
    components[component_len - 2][:value] += last_component[:value]
    components.pop
  end

  recount(components)

  components
end

#cast_input(str) ⇒ Object



118
119
120
# File 'lib/myers_diff/word_diff.rb', line 118

def cast_input(str)
  str
end

#clone_path(path_hash) ⇒ Object



197
198
199
# File 'lib/myers_diff/word_diff.rb', line 197

def clone_path(path_hash)
  { new_pos: path_hash[:new_pos], components: path_hash[:components].dup }
end

#diff(s1, s2, **options) ⇒ Object



6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# File 'lib/myers_diff/word_diff.rb', line 6

def diff(s1, s2, **options)
  old_string = cast_input(s1)
  new_string = cast_input(s2)

  old_string = remove_empty(tokenize(old_string))
  new_string = remove_empty(tokenize(new_string))

  new_len = new_string.size
  old_len = old_string.size
  edit_length = 1
  max_edit_length = new_len + old_len
  best_path = { }
  best_path[0] = { new_pos: -1, components: [] }

  old_pos = extract_common(best_path[0], new_string, old_string, 0)
  if best_path[0][:new_pos] + 1 >= new_len && old_pos + 1 >= old_len
    return [ { value: join(new_string), count: new_string.size } ]
  end

  exec_edit_length = lambda do
    diagonal_path = -1 * edit_length
    while diagonal_path <= edit_length
      add_path = best_path[diagonal_path - 1]
      remove_path = best_path[diagonal_path + 1]
      old_pos = (remove_path ? remove_path[:new_pos] : 0) - diagonal_path
      best_path[diagonal_path - 1] = nil if add_path

      can_add = add_path && add_path[:new_pos] + 1 < new_len
      can_remove = remove_path && 0 <= old_pos && old_pos < old_len
      if !can_add && !can_remove
        best_path[diagonal_path] = nil
        diagonal_path += 2
        next
      end

      base_path = if !can_add || (can_remove && add_path[:new_pos] < remove_path[:new_pos])
        p = clone_path(remove_path)
        push_component(p[:components], nil, true)
        p
      else
        p = add_path
        p[:new_pos] += 1
        push_component(p[:components], true, nil)
        p
      end

      old_pos = extract_common(base_path, new_string, old_string, diagonal_path)

      if base_path[:new_pos] + 1 >= new_len && old_pos + 1 >= old_len
        return build_values(base_path[:components], new_string, old_string)
      else
        best_path[diagonal_path] = base_path
      end

      diagonal_path += 2
    end

    edit_length += 1
    nil
  end

  while edit_length <= max_edit_length
    if res = exec_edit_length.call
      return res
    end
  end

  'death'
end

#equals(l, r) ⇒ Object



108
109
110
111
112
# File 'lib/myers_diff/word_diff.rb', line 108

def equals(l, r)
  l == r
  # TODO: support custom comparator
  # TODO: support case-insensitive
end

#extract_common(base_path, new_string, old_string, diagonal_path) ⇒ Object

base_path : { new_pos: int, components: [] } diagonal_path : int



87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# File 'lib/myers_diff/word_diff.rb', line 87

def extract_common(base_path, new_string, old_string, diagonal_path)
  new_len = new_string.size
  old_len = old_string.size
  new_pos = base_path[:new_pos]
  old_pos = new_pos - diagonal_path
  common_count = 0

  while new_pos + 1 < new_len && old_pos + 1 < old_len && equals(new_string[new_pos + 1], old_string[old_pos + 1])
    new_pos += 1
    old_pos += 1
    common_count += 1
  end

  if common_count > 0
    base_path[:components].push(count: common_count)
  end

  base_path[:new_pos] = new_pos
  old_pos
end

#join(chars) ⇒ Object



141
142
143
# File 'lib/myers_diff/word_diff.rb', line 141

def join(chars)
  chars.join('')
end

#push_component(components, added, removed) ⇒ Object



76
77
78
79
80
81
82
83
# File 'lib/myers_diff/word_diff.rb', line 76

def push_component(components, added, removed)
  last = components.last
  if last && last[:added] == added && last[:removed] == removed
    components[-1] = { added: last[:added], removed: last[:removed], count: last[:count] + 1 }
  else
    components.push(count: 1, added: added, removed: removed)
  end
end

#recount(components) ⇒ Object



201
202
203
# File 'lib/myers_diff/word_diff.rb', line 201

def recount(components)
  components.each { |component| component[:count] = component[:value].size }
end

#remove_empty(array) ⇒ Object



114
115
116
# File 'lib/myers_diff/word_diff.rb', line 114

def remove_empty(array)
  array.compact
end

#tokenize(str) ⇒ Object



122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# File 'lib/myers_diff/word_diff.rb', line 122

def tokenize(str)
  tokens = str.split(BOUNDARIES_REGEX)

  i = 0
  while i < tokens.size - 1
    if tokens[i + 1].empty? && !tokens[i + 2].empty? &&
        EXTENDED_WORD_CHARS_REGEX.match?(tokens[i]) &&
        EXTENDED_WORD_CHARS_REGEX.match?(tokens[i + 2])
      tokens[i] += tokens[i + 2]
      tokens.delete_at(i + 1)
      tokens.delete_at(i + 1)
      i -= 1
    end
    i += 1
  end

  tokens
end