Module: CSVDiff::Algorithm
- Included in:
- CSVDiff
- Defined in:
- lib/csv-diff/algorithm.rb
Overview
Implements the CSV diff algorithm.
Defined Under Namespace
Classes: Diff
Instance Method Summary collapse
-
#diff_row(left_row, right_row, fields) ⇒ Hash<String, Array>
Identifies the fields that are different between two versions of the same row.
-
#diff_sources(left, right, key_fields, diff_fields, options = {}) ⇒ Object
Diffs two CSVSource structures.
Instance Method Details
#diff_row(left_row, right_row, fields) ⇒ Hash<String, Array>
Identifies the fields that are different between two versions of the same row.
192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 |
# File 'lib/csv-diff/algorithm.rb', line 192 def diff_row(left_row, right_row, fields) diffs = {} fields.each do |attr| eq_proc = @equality_procs[attr] right_val = right_row[attr] right_val = nil if right_val == "" left_val = left_row[attr] left_val = nil if left_val == "" if eq_proc diffs[attr] = [left_val, right_val] unless eq_proc.call(left_val, right_val) elsif @case_sensitive diffs[attr] = [left_val, right_val] unless left_val == right_val elsif (left_val.to_s.upcase != right_val.to_s.upcase) diffs[attr] = [left_val, right_val] end end diffs if diffs.size > 0 end |
#diff_sources(left, right, key_fields, diff_fields, options = {}) ⇒ Object
Diffs two CSVSource structures.
77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 |
# File 'lib/csv-diff/algorithm.rb', line 77 def diff_sources(left, right, key_fields, diff_fields, = {}) unless left.case_sensitive? == right.case_sensitive? raise ArgumentError, "Left and right must have same settings for case-sensitivity" end unless left.parent_fields.length == right.parent_fields.length raise ArgumentError, "Left and right must have same settings for parent/child fields" end # Ensure key fields are not also in the diff_fields diff_fields = diff_fields - key_fields left_index = left.index left_values = left.lines left_keys = left_values.keys right_index = right.index right_values = right.lines right_keys = right_values.keys parent_field_count = left.parent_fields.length include_adds = ![:ignore_adds] include_moves = ![:ignore_moves] include_updates = ![:ignore_updates] include_deletes = ![:ignore_deletes] @case_sensitive = left.case_sensitive? @equality_procs = .fetch(:equality_procs, {}) diffs = {} potential_moves = Hash.new{ |h, k| h[k] = [] } # First identify deletions if include_deletes (left_keys - right_keys).each do |key| # Delete key_vals = key.split('~', -1) parent = key_vals[0...parent_field_count].join('~') child = key_vals[parent_field_count..-1].join('~') left_parent = left_index[parent] left_value = left_values[key] row_idx = left_keys.index(key) sib_idx = left_parent.index(key) raise "Can't locate key #{key} in parent #{parent}" unless sib_idx diffs[key] = Diff.new(:delete, left_value, row_idx, sib_idx) potential_moves[child] << key #puts "Delete: #{key}" end end # Now identify adds/updates right_keys.each_with_index do |key, right_row_id| key_vals = key.split('~', -1) parent = key_vals[0...parent_field_count].join('~') left_parent = left_index[parent] right_parent = right_index[parent] left_value = left_values[key] right_value = right_values[key] left_idx = left_parent && left_parent.index(key) right_idx = right_parent && right_parent.index(key) if left_idx && right_idx if include_updates && (changes = diff_row(left_value, right_value, diff_fields)) id = id_fields(key_fields, right_value) diffs[key] = Diff.new(:update, id.merge!(changes), right_row_id, right_idx) #puts "Change: #{key}" end if include_moves left_common = left_parent & right_parent right_common = right_parent & left_parent left_pos = left_common.index(key) right_pos = right_common.index(key) if left_pos != right_pos # Move if d = diffs[key] d.sibling_position = [left_idx, right_idx] else id = id_fields(key_fields, right_value) diffs[key] = Diff.new(:move, id, right_row_id, [left_idx, right_idx]) end #puts "Move #{left_idx} -> #{right_idx}: #{key}" end end elsif right_idx # Add child = key_vals[parent_field_count..-1].join('~') if potential_moves.has_key?(child) && old_key = potential_moves[child].pop diffs.delete(old_key) if include_updates left_value = left_values[old_key] id = id_fields(right.child_fields, right_value) changes = diff_row(left_value, right_value, left.parent_fields + diff_fields) diffs[key] = Diff.new(:update, id.merge!(changes), right_row_id, right_idx) #puts "Update Parent: #{key}" end elsif include_adds diffs[key] = Diff.new(:add, right_value, right_row_id, right_idx) #puts "Add: #{key}" end end end diffs end |