Module: JsonDiff

Defined in:
lib/json-diff/diff.rb,
lib/json-diff/version.rb,
lib/json-diff/index-map.rb,
lib/json-diff/operation.rb

Defined Under Namespace

Classes: AdditionIndexMap, IndexMap, IndexMaps, RemovalIndexMap

Constant Summary collapse

VERSION =
'0.4.1'

Class Method Summary collapse

Class Method Details

.add(path, value) ⇒ Object



37
38
39
# File 'lib/json-diff/operation.rb', line 37

def self.add(path, value)
  {'op' => 'add', 'path' => path, 'value' => value}
end

.array_changes(pairing) ⇒ Object

Input: [[before index, after index, similarity]],

removed: [before index],
added: [after index]

Output: {removed: [before index],

pairs: [[before index, after index,
  original before index, original after index]],
added: [after index]}


253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
# File 'lib/json-diff/diff.rb', line 253

def self.array_changes(pairing)
  # We perform removals starting from the highest index.
  # That way, they don't offset their own.
  pairing[:removed].sort!.reverse!
  pairing[:added].sort!

  # First, map indices from before to after removals.
  removal_map = IndexMaps.new
  pairing[:removed].each { |rm| removal_map.removal(rm) }
  # And map indices from after to before additions
  # (removals, since it is reversed).
  addition_map = IndexMaps.new
  pairing[:added].reverse.each { |ad| addition_map.removal(ad) }

  moves = {}
  orig_before = {}
  orig_after = {}
  pairing[:pairs].each do |before, after|
    mapped_before = removal_map.map(before)
    mapped_after = addition_map.map(after)
    orig_before[mapped_before] = before
    orig_after[mapped_after] = after
    moves[mapped_before] = mapped_after
  end

  # Now, detect rings within the pairs.
  # The proof is, if whatever was at position i was sent to position j,
  # whatever was at position j cannot have stayed at j.
  # By induction, there is a ring.
  # Oh, and a piece of the proof is that the arrays have the same length.
  # Trivially. Right. Hey, this is not an interview!
  rings = []
  while moves.size > 0
    # i goes to j. j goes to (…). k goes to i.
    ring = []
    pair = moves.shift
    origin, target = pair
    first_origin = origin
    while target != first_origin
      ring << origin
      origin = target
      target = moves[target]
      moves.delete(origin)
    end
    ring << origin
    rings << ring
  end
  # rings is of the form [[i,j,k], …]

  # Finally, we can register the moves.
  # The idea is, if the whole ring moves instantaneously,
  # no element outside of the ring changed position.
  pairs = []
  rings.each do |ring|
    orig_ring = ring.map { |i| [orig_before[i], orig_after[i]] }
    ring_map = IndexMaps.new
    len = ring.size
    i = 0
    while i < len
      ni = (i + 1) % len  # next i
      if ring[i] != ring[ni]
        pairs << [ring_map.map(ring[i]), ring[ni], orig_ring[i][0], orig_ring[ni][1]]
      end
      ring_map.removal(ring[i])
      ring_map.addition(ring[ni])
      i += 1
    end
  end

  pairing[:pairs] = pairs

  pairing
end

.array_pairing(before, after, options) ⇒ Object

[[before index, after index, similarity]],

removed: [before index],
added: [after index]
  • options: procedure taking (before, after) objects. Returns a probability between 0 and 1 of how likely ‘after` is a modification of `before`, or nil if it cannot determine it.



118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
# File 'lib/json-diff/diff.rb', line 118

def self.array_pairing(before, after, options)
  # Array containing the array of similarities from before to after.
  similarities = before.map do |before_item|
    after.map do |after_item|
      similarity(before_item, after_item, options)
    end
  end

  # Array containing the array of couples of indices, sorted by similarity.
  indices = before.map.with_index do |before_item, before_index|
    after.map.with_index do |after_item, after_index|
      [before_index, after_index]
    end
  end

  # Sort them in O(n^2 log(n)).
  indices.map! do |couples|
    couples.sort! do |a, b|
      a_before_index = a[0]
      b_before_index = b[0]
      a_after_index = a[1]
      b_after_index = b[1]

      similarities[b_before_index][b_after_index] <=> similarities[a_before_index][a_after_index]
    end
  end
  # Sort the toplevel.
  indices.sort! do |a, b|
    a_top_before_index = a[0][0]
    a_top_after_index = a[0][1]
    b_top_before_index = b[0][0]
    b_top_after_index = b[0][1]

    similarities[b_top_before_index][b_top_after_index] <=> similarities[a_top_before_index][a_top_after_index]
  end

  # Map from indices to boolean (true if paired).
  before_paired = {}
  after_paired = {}

  num_pairs = [before.size, after.size].min

  pairs = (0...num_pairs).map do |_|
    unpaired_before_index = indices.index { |a| !before_paired[a[0][0]] }
    unpaired_after_index = indices[unpaired_before_index].index { |a| !after_paired[a[1]] }
    unpaired_couple = indices[unpaired_before_index][unpaired_after_index]
    before_paired[unpaired_couple[0]] = true
    after_paired[unpaired_couple[1]] = true

    [unpaired_couple[0], unpaired_couple[1],
      similarities[unpaired_couple[0]][unpaired_couple[1]]]
  end

  if before.size < after.size
    added = after.map.with_index { |_, i| i} - after_paired.keys
    removed = []
  else
    removed = before.map.with_index { |_, i| i } - before_paired.keys
    added = []
  end

  {
    pairs: pairs,
    removed: removed,
    added: added,
  }
end

.diff(before, after, opts = {}) ⇒ Object



3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# File 'lib/json-diff/diff.rb', line 3

def self.diff(before, after, opts = {})
  path = opts[:path] || ''
  include_addition = (opts[:additions] == nil) ? true : opts[:additions]
  include_moves = (opts[:moves] == nil) ? true : opts[:moves]
  include_was = (opts[:include_was] == nil) ? false : opts[:include_was]
  original_indices = (opts[:original_indices] == nil) ? false : opts[:original_indices]

  changes = []

  if before.is_a?(Hash)
    if !after.is_a?(Hash)
      changes << replace(path, include_was ? before : nil, after)
    else
      lost = before.keys - after.keys
      lost.each do |key|
        inner_path = extend_json_pointer(path, key)
        changes << remove(inner_path, include_was ? before[key] : nil)
      end

      if include_addition
        gained = after.keys - before.keys
        gained.each do |key|
          inner_path = extend_json_pointer(path, key)
          changes << add(inner_path, after[key])
        end
      end

      kept = before.keys & after.keys
      kept.each do |key|
        inner_path = extend_json_pointer(path, key)
        changes += diff(before[key], after[key], opts.merge(path: inner_path))
      end
    end
  elsif before.is_a?(Array)
    if !after.is_a?(Array)
      changes << replace(path, include_was ? before : nil, after)
    elsif before.size == 0
      if include_addition
        after.each_with_index do |item, index|
          inner_path = extend_json_pointer(path, index)
          changes << add(inner_path, item)
        end
      end
    elsif after.size == 0
      before.each do |item|
        # Delete elements from the start.
        inner_path = extend_json_pointer(path, 0)
        changes << remove(inner_path, include_was ? item : nil)
      end
    else
      pairing = array_pairing(before, after, opts)
      # FIXME: detect replacements.

      # All detected moves that do not reach the similarity limit are deleted
      # and re-added.
      pairing[:pairs].select! do |pair|
        sim = pair[2]
        kept = (sim >= 0.5)
        if !kept
          pairing[:removed] << pair[0]
          pairing[:added] << pair[1]
        end
        kept
      end

      pairing[:pairs].each do |pair|
        before_index, after_index = pair
        inner_path = extend_json_pointer(path, before_index)
        changes += diff(before[before_index], after[after_index], opts.merge(path: inner_path))
      end

      if !original_indices
        # Recompute indices to account for offsets from insertions and
        # deletions.
        pairing = array_changes(pairing)
      end

      pairing[:removed].each do |before_index|
        inner_path = extend_json_pointer(path, before_index)
        changes << remove(inner_path, include_was ? before[before_index] : nil)
      end

      pairing[:pairs].each do |pair|
        before_index, after_index = pair
        inner_before_path = extend_json_pointer(path, before_index)
        inner_after_path = extend_json_pointer(path, after_index)

        if before_index != after_index && include_moves
          changes << move(inner_before_path, inner_after_path)
        end
      end

      if include_addition
        pairing[:added].each do |after_index|
          inner_path = extend_json_pointer(path, after_index)
          changes << add(inner_path, after[after_index])
        end
      end
    end
  else
    if before != after
      changes << replace(path, include_was ? before : nil, after)
    end
  end

  changes
end

.extend_json_pointer(pointer, key) ⇒ Object

Add a key to a JSON pointer.



21
22
23
24
25
26
27
# File 'lib/json-diff/operation.rb', line 21

def self.extend_json_pointer(pointer, key)
  if pointer == ''
    json_pointer([key])
  else
    pointer + json_pointer([key])
  end
end

.json_pointer(path) ⇒ Object

Convert a list of strings or numbers to an RFC6901 JSON pointer. tools.ietf.org/html/rfc6901



5
6
7
8
9
10
11
12
13
14
15
16
17
18
# File 'lib/json-diff/operation.rb', line 5

def self.json_pointer(path)
  return "" if path == []

  escaped_path = path.map do |key|
    if key.is_a?(String)
      key.gsub('~', '~0')
         .gsub('/', '~1')
    else
      key.to_s
    end
  end.join('/')

  "/#{escaped_path}"
end

.move(source, target) ⇒ Object



49
50
51
# File 'lib/json-diff/operation.rb', line 49

def self.move(source, target)
  {'op' => 'move', 'from' => source, 'path' => target}
end

.remove(path, value) ⇒ Object



41
42
43
44
45
46
47
# File 'lib/json-diff/operation.rb', line 41

def self.remove(path, value)
  if value != nil
    {'op' => 'remove', 'path' => path, 'was' => value}
  else
    {'op' => 'remove', 'path' => path}
  end
end

.replace(path, before, after) ⇒ Object



29
30
31
32
33
34
35
# File 'lib/json-diff/operation.rb', line 29

def self.replace(path, before, after)
  if before != nil
    {'op' => 'replace', 'path' => path, 'was' => before, 'value' => after}
  else
    {'op' => 'replace', 'path' => path, 'value' => after}
  end
end

.similarity(before, after, options) ⇒ Object

Compute an arbitrary notion of how probable it is that one object is the result of modifying the other.

  • options: procedure taking (before, after) objects. Returns a probability between 0 and 1 of how likely ‘after` is a modification of `before`, or nil if it cannot determine it.



192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
# File 'lib/json-diff/diff.rb', line 192

def self.similarity(before, after, options)
  return 0.0 if before.class != after.class

  # Use the custom similarity procedure if it isn't nil.
  if options[:similarity] != nil
    custom_result = options[:similarity].call(before, after)
    return custom_result if custom_result != nil
  end

  if before.is_a?(Hash)
    if before.size == 0
      if after.size == 0
        return 1.0
      else
        return 0.0
      end
    end

    # Average similarity between keys' value.
    # We don't consider key renames.
    similarities = []
    before.each do |before_key, before_item|
      similarities << similarity(before_item, after[before_key], options)
    end
    # Also consider keys' names.
    before_keys = before.keys
    after_keys = after.keys
    key_similarity = (before_keys & after_keys).size / (before_keys | after_keys).size
    similarities << key_similarity

    similarities.reduce(:+) / similarities.size
  elsif before.is_a?(Array)
    return 1.0 if before.size == 0

    # The most likely match between an element in the old and the new list is
    # presumably the right one, so we take the average of the maximum
    # similarity between each elements of the list.
    similarities = before.map do |before_item|
      after.map do |after_item|
        similarity(before_item, after_item, options)
      end.max || 0.0
    end

    similarities.reduce(:+) / similarities.size
  elsif before == after
    1.0
  else
    0.0
  end
end