Class: MyersDiff::WordDiff
- Inherits:
-
Object
- Object
- MyersDiff::WordDiff
- Defined in:
- lib/myers_diff/word_diff.rb
Constant Summary collapse
- BOUNDARIES_REGEX =
/(\s+|[()\[\]{}'"]|\b)/.freeze
- EXTENDED_WORD_CHARS_REGEX =
/^[a-zA-Z\u{C0}-\u{FF}\u{D8}-\u{F6}\u{F8}-\u{2C6}\u{2C8}-\u{2D7}\u{2DE}-\u{2FF}\u{1E00}-\u{1EFF}]+$/u.freeze
Instance Method Summary collapse
-
#build_values(components, new_string, old_string, use_longest_token = true) ⇒ Object
new_string - tokenized string i.e.
- #cast_input(str) ⇒ Object
- #clone_path(path_hash) ⇒ Object
- #diff(s1, s2, **options) ⇒ Object
- #equals(l, r) ⇒ Object
-
#extract_common(base_path, new_string, old_string, diagonal_path) ⇒ Object
base_path : { new_pos: int, components: [] } diagonal_path : int.
- #join(chars) ⇒ Object
- #push_component(components, added, removed) ⇒ Object
- #recount(components) ⇒ Object
- #remove_empty(array) ⇒ Object
- #tokenize(str) ⇒ Object
Instance Method Details
#build_values(components, new_string, old_string, use_longest_token = true) ⇒ Object
new_string - tokenized string i.e. array of strings
146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 |
# File 'lib/myers_diff/word_diff.rb', line 146 def build_values(components, new_string, old_string, use_longest_token = true) component_pos = 0 component_len = components.size new_pos = 0 old_pos = 0 while component_pos < component_len component = components[component_pos] if !component[:removed] if !component[:added] && use_longest_token value = new_string[new_pos, component[:count]] value = value.map.with_index do |val, i| old_val = old_string[old_pos + i] old_val.size > val.size ? old_val : val end component[:value] = join(value) else component[:value] = join(new_string[new_pos, component[:count]]) end new_pos += component[:count] old_pos += component[:count] unless component[:added] else component[:value] = join(old_string[old_pos, component[:count]]) old_pos += component[:count] if component_pos && 0 <= component_pos - 1 && components[component_pos - 1][:added] tmp = components[component_pos - 1] components[component_pos - 1] = components[component_pos] components[component_pos] = tmp end end component_pos += 1 end last_component = components[component_len - 1] if component_len > 1 && last_component[:value].is_a?(String) && (last_component[:added] || last_component[:removed]) && equals('', last_component[:value]) components[component_len - 2][:value] += last_component[:value] components.pop end recount(components) components end |
#cast_input(str) ⇒ Object
118 119 120 |
# File 'lib/myers_diff/word_diff.rb', line 118 def cast_input(str) str end |
#clone_path(path_hash) ⇒ Object
197 198 199 |
# File 'lib/myers_diff/word_diff.rb', line 197 def clone_path(path_hash) { new_pos: path_hash[:new_pos], components: path_hash[:components].dup } end |
#diff(s1, s2, **options) ⇒ Object
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
# File 'lib/myers_diff/word_diff.rb', line 6 def diff(s1, s2, **) old_string = cast_input(s1) new_string = cast_input(s2) old_string = remove_empty(tokenize(old_string)) new_string = remove_empty(tokenize(new_string)) new_len = new_string.size old_len = old_string.size edit_length = 1 max_edit_length = new_len + old_len best_path = { } best_path[0] = { new_pos: -1, components: [] } old_pos = extract_common(best_path[0], new_string, old_string, 0) if best_path[0][:new_pos] + 1 >= new_len && old_pos + 1 >= old_len return [ { value: join(new_string), count: new_string.size } ] end exec_edit_length = lambda do diagonal_path = -1 * edit_length while diagonal_path <= edit_length add_path = best_path[diagonal_path - 1] remove_path = best_path[diagonal_path + 1] old_pos = (remove_path ? remove_path[:new_pos] : 0) - diagonal_path best_path[diagonal_path - 1] = nil if add_path can_add = add_path && add_path[:new_pos] + 1 < new_len can_remove = remove_path && 0 <= old_pos && old_pos < old_len if !can_add && !can_remove best_path[diagonal_path] = nil diagonal_path += 2 next end base_path = if !can_add || (can_remove && add_path[:new_pos] < remove_path[:new_pos]) p = clone_path(remove_path) push_component(p[:components], nil, true) p else p = add_path p[:new_pos] += 1 push_component(p[:components], true, nil) p end old_pos = extract_common(base_path, new_string, old_string, diagonal_path) if base_path[:new_pos] + 1 >= new_len && old_pos + 1 >= old_len return build_values(base_path[:components], new_string, old_string) else best_path[diagonal_path] = base_path end diagonal_path += 2 end edit_length += 1 nil end while edit_length <= max_edit_length if res = exec_edit_length.call return res end end 'death' end |
#equals(l, r) ⇒ Object
108 109 110 111 112 |
# File 'lib/myers_diff/word_diff.rb', line 108 def equals(l, r) l == r # TODO: support custom comparator # TODO: support case-insensitive end |
#extract_common(base_path, new_string, old_string, diagonal_path) ⇒ Object
base_path : { new_pos: int, components: [] } diagonal_path : int
87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
# File 'lib/myers_diff/word_diff.rb', line 87 def extract_common(base_path, new_string, old_string, diagonal_path) new_len = new_string.size old_len = old_string.size new_pos = base_path[:new_pos] old_pos = new_pos - diagonal_path common_count = 0 while new_pos + 1 < new_len && old_pos + 1 < old_len && equals(new_string[new_pos + 1], old_string[old_pos + 1]) new_pos += 1 old_pos += 1 common_count += 1 end if common_count > 0 base_path[:components].push(count: common_count) end base_path[:new_pos] = new_pos old_pos end |
#join(chars) ⇒ Object
141 142 143 |
# File 'lib/myers_diff/word_diff.rb', line 141 def join(chars) chars.join('') end |
#push_component(components, added, removed) ⇒ Object
76 77 78 79 80 81 82 83 |
# File 'lib/myers_diff/word_diff.rb', line 76 def push_component(components, added, removed) last = components.last if last && last[:added] == added && last[:removed] == removed components[-1] = { added: last[:added], removed: last[:removed], count: last[:count] + 1 } else components.push(count: 1, added: added, removed: removed) end end |
#recount(components) ⇒ Object
201 202 203 |
# File 'lib/myers_diff/word_diff.rb', line 201 def recount(components) components.each { |component| component[:count] = component[:value].size } end |
#remove_empty(array) ⇒ Object
114 115 116 |
# File 'lib/myers_diff/word_diff.rb', line 114 def remove_empty(array) array.compact end |
#tokenize(str) ⇒ Object
122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
# File 'lib/myers_diff/word_diff.rb', line 122 def tokenize(str) tokens = str.split(BOUNDARIES_REGEX) i = 0 while i < tokens.size - 1 if tokens[i + 1].empty? && !tokens[i + 2].empty? && EXTENDED_WORD_CHARS_REGEX.match?(tokens[i]) && EXTENDED_WORD_CHARS_REGEX.match?(tokens[i + 2]) tokens[i] += tokens[i + 2] tokens.delete_at(i + 1) tokens.delete_at(i + 1) i -= 1 end i += 1 end tokens end |