Class: Moab::FileGroupDifference

Inherits:
Serializer::Serializable show all
Includes:
HappyMapper
Defined in:
lib/moab/file_group_difference.rb

Overview

Note:

Copyright © 2012 by The Board of Trustees of the Leland Stanford Junior University. All rights reserved. See LICENSE for details.

Performs analysis and reports the differences between two matching FileGroup objects. The descending elements of the report hold a detailed breakdown of file-level differences, organized by change type. This stanza is a child element of FileInventoryDifference, the documentation of which contains a full example.

In order to determine the detailed nature of the differences that are present between the two manifests, this algorithm first compares the sets of file signatures present in the groups being compared, then uses the result of that operation for subsequent analysis of filename correspondences.

For the first step, a Ruby Hash is extracted from each of the of the two groups, with an array of FileSignature object used as hash keys, and the corresponding FileInstance arrays as the hash values. The set of keys from the basis hash can be compared against the keys from the other hash using Array operators:

  • matching = basis_array & other_array

  • basis_only = basis_array - other_array

  • other_only = other_array - basis_array

For the second step of the comparison, the matching and non-matching sets of hash entries are further categorized as follows:

  • identical = signature and file path is the same in both basis and other file group

  • renamed = signature is unchanged, but the path has moved

  • copyadded = duplicate copy of file was added

  • copydeleted = duplicate copy of file was deleted

  • modified = path is same in both groups, but the signature has changed

  • added = signature and path are only in the other inventor

  • deleted = signature and path are only in the basis inventory

Data Model

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods inherited from Serializer::Serializable

#array_to_hash, deep_diff, #diff, #key, #key_name, #to_hash, #to_json, #to_yaml, #variable_names, #variables

Constructor Details

#initialize(opts = {}) ⇒ FileGroupDifference



56
57
58
59
# File 'lib/moab/file_group_difference.rb', line 56

def initialize(opts={})
  @subset_hash = Hash.new {|hash, key| hash[key] = FileGroupDifferenceSubset.new(:change => key.to_s)}
  super(opts)
end

Instance Attribute Details

#addedInteger



115
# File 'lib/moab/file_group_difference.rb', line 115

attribute :added, Integer, :on_save => Proc.new { |n| n.to_s }

#copyaddedInteger



87
# File 'lib/moab/file_group_difference.rb', line 87

attribute :copyadded, Integer, :on_save => Proc.new { |n| n.to_s }

#copydeletedInteger



94
# File 'lib/moab/file_group_difference.rb', line 94

attribute :copydeleted, Integer, :on_save => Proc.new { |n| n.to_s }

#deletedInteger



122
# File 'lib/moab/file_group_difference.rb', line 122

attribute :deleted, Integer, :on_save => Proc.new { |n| n.to_s }

#difference_countInteger



68
# File 'lib/moab/file_group_difference.rb', line 68

attribute :difference_count, Integer, :tag => 'differenceCount', :on_save => Proc.new { |i| i.to_s }

#group_idString



63
# File 'lib/moab/file_group_difference.rb', line 63

attribute :group_id, String, :tag => 'groupId', :key => true

#identicalInteger



80
# File 'lib/moab/file_group_difference.rb', line 80

attribute :identical, Integer, :on_save => Proc.new { |n| n.to_s }

#modifiedInteger



108
# File 'lib/moab/file_group_difference.rb', line 108

attribute :modified, Integer, :on_save => Proc.new { |n| n.to_s }

#renamedInteger



101
# File 'lib/moab/file_group_difference.rb', line 101

attribute :renamed, Integer, :on_save => Proc.new { |n| n.to_s }

#subset_hashHash<Symbol,FileGroupDifferenceSubset>



47
48
49
# File 'lib/moab/file_group_difference.rb', line 47

def subset_hash
  @subset_hash
end

#subsetsArray<FileGroupDifferenceSubset>



130
# File 'lib/moab/file_group_difference.rb', line 130

has_many :subsets, FileGroupDifferenceSubset, :tag => 'subset'

Instance Method Details

#basis_only_keys(basis_hash, other_hash) ⇒ Array

Returns Compare the keys of two hashes and return the keys unique to the first hash.



174
175
176
# File 'lib/moab/file_group_difference.rb', line 174

def basis_only_keys(basis_hash, other_hash)
  basis_hash.keys - other_hash.keys
end

#compare_file_groups(basis_group, other_group) ⇒ FileGroupDifference

Returns Compare two file groups and return a differences report.



189
190
191
192
193
194
# File 'lib/moab/file_group_difference.rb', line 189

def compare_file_groups(basis_group, other_group)
  @group_id = basis_group.group_id
  compare_matching_signatures(basis_group, other_group)
  compare_non_matching_signatures(basis_group, other_group)
  self
end

#compare_matching_signatures(basis_group, other_group) ⇒ FileGroupDifference

Returns For signatures that are present in both groups, report which file instances are identical or renamed.



200
201
202
203
204
205
# File 'lib/moab/file_group_difference.rb', line 200

def compare_matching_signatures(basis_group, other_group)
  matching_signatures = matching_keys(basis_group.signature_hash, other_group.signature_hash)
  tabulate_unchanged_files(matching_signatures, basis_group.signature_hash, other_group.signature_hash)
  tabulate_renamed_files(matching_signatures, basis_group.signature_hash, other_group.signature_hash)
  self
end

#compare_non_matching_signatures(basis_group, other_group) ⇒ FileGroupDifference

Returns For signatures that are present in only one or the other group, report which file instances are modified, deleted, or added.



211
212
213
214
215
216
217
218
219
220
# File 'lib/moab/file_group_difference.rb', line 211

def compare_non_matching_signatures(basis_group, other_group)
  basis_only_signatures = basis_only_keys(basis_group.signature_hash, other_group.signature_hash)
  other_only_signatures = other_only_keys(basis_group.signature_hash, other_group.signature_hash)
  basis_path_hash = basis_group.path_hash_subset(basis_only_signatures)
  other_path_hash = other_group.path_hash_subset(other_only_signatures)
  tabulate_modified_files(basis_path_hash, other_path_hash)
  tabulate_added_files(basis_path_hash, other_path_hash)
  tabulate_deleted_files(basis_path_hash, other_path_hash)
  self
end

#file_deltasHash<Symbol,Array>



338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
# File 'lib/moab/file_group_difference.rb', line 338

def file_deltas()
  # The hash to be returned
  deltas = Hash.new {|hash, key| hash[key] = []}
  # case where other_path is empty or 'same'.  (create array of strings)
  [:identical, :modified, :deleted, :copydeleted].each do |change|
    deltas[change].concat(@subset_hash[change].files.collect{ |file| file.basis_path })
  end
  # case where basis_path and other_path are both present.  (create array of arrays)
  [:copyadded, :renamed].each do |change|
    deltas[change].concat(@subset_hash[change].files.collect { |file| [file.basis_path,file.other_path] })
  end
  # case where basis_path is empty.  (create array of strings)
  [:added].each do |change|
    deltas[change].concat(@subset_hash[change].files.collect { |file| file.other_path })
  end
  deltas
end

#matching_keys(basis_hash, other_hash) ⇒ Array

Returns Compare the keys of two hashes and return the intersection.



167
168
169
# File 'lib/moab/file_group_difference.rb', line 167

def matching_keys(basis_hash, other_hash)
  basis_hash.keys & other_hash.keys
end

#other_only_keys(basis_hash, other_hash) ⇒ Array

Returns Compare the keys of two hashes and return the keys unique to the second hash.



181
182
183
# File 'lib/moab/file_group_difference.rb', line 181

def other_only_keys(basis_hash, other_hash)
  other_hash.keys - basis_hash.keys
end

#rename_require_temp_files(filepairs) ⇒ Boolean



361
362
363
364
365
366
367
368
369
370
371
372
# File 'lib/moab/file_group_difference.rb', line 361

def rename_require_temp_files(filepairs)
  # Split the filepairs into two arrays
  oldnames = []
  newnames = []
  filepairs.each do |old,new|
    oldnames << old
    newnames << new
  end
  # Are any of the filenames the same in set of oldnames and set of newnames?
  intersection = oldnames & newnames
  intersection.count > 0
end

#rename_tempfile_triplets(filepairs) ⇒ Array<Array<String>>



376
377
378
# File 'lib/moab/file_group_difference.rb', line 376

def rename_tempfile_triplets(filepairs)
  filepairs.collect{|old,new| [old, new, "#{new}-#{Time.now.strftime('%Y%m%d%H%H%S')}-tmp"]}
end

#subset(change) ⇒ FileGroupDifferenceSubset



51
52
53
# File 'lib/moab/file_group_difference.rb', line 51

def subset(change)
  @subset_hash[change.to_sym]
end

#summaryFileGroupDifference

Returns Clone just this element for inclusion in a versionMetadata structure.



150
151
152
153
154
155
156
157
158
159
160
161
# File 'lib/moab/file_group_difference.rb', line 150

def summary()
  FileGroupDifference.new(
      :group_id => group_id,
      :identical => identical,
      :copyadded => copyadded,
      :copydeleted => copydeleted,
      :renamed => renamed,
      :modified => modified,
      :added => added,
      :deleted => deleted
  )
end

#summary_fieldsArray<String>



143
144
145
# File 'lib/moab/file_group_difference.rb', line 143

def summary_fields
  %w{group_id difference_count identical copyadded copydeleted renamed modified deleted added}
end

#tabulate_added_files(basis_path_hash, other_path_hash) ⇒ FileGroupDifference

Returns Container for reporting the set of file-level differences of type ‘added’.



308
309
310
311
312
313
314
315
316
317
# File 'lib/moab/file_group_difference.rb', line 308

def tabulate_added_files(basis_path_hash, other_path_hash)
  other_only_keys(basis_path_hash, other_path_hash).each do |path|
    fid = FileInstanceDifference.new(:change => 'added')
    fid.basis_path = ""
    fid.other_path = path
    fid.signatures << other_path_hash[path]
    @subset_hash[:added].files << fid
  end
  self
end

#tabulate_deleted_files(basis_path_hash, other_path_hash) ⇒ FileGroupDifference

Returns Container for reporting the set of file-level differences of type ‘deleted’.



326
327
328
329
330
331
332
333
334
335
# File 'lib/moab/file_group_difference.rb', line 326

def tabulate_deleted_files(basis_path_hash, other_path_hash)
  basis_only_keys(basis_path_hash, other_path_hash).each do |path|
    fid = FileInstanceDifference.new(:change => 'deleted')
    fid.basis_path = path
    fid.other_path = ""
    fid.signatures << basis_path_hash[path]
    @subset_hash[:deleted].files << fid
  end
  self
end

#tabulate_modified_files(basis_path_hash, other_path_hash) ⇒ FileGroupDifference

Returns Container for reporting the set of file-level differences of type ‘modified’.



289
290
291
292
293
294
295
296
297
298
299
# File 'lib/moab/file_group_difference.rb', line 289

def tabulate_modified_files(basis_path_hash, other_path_hash)
  matching_keys(basis_path_hash, other_path_hash).each do |path|
    fid = FileInstanceDifference.new(:change => 'modified')
    fid.basis_path = path
    fid.other_path = "same"
    fid.signatures << basis_path_hash[path]
    fid.signatures << other_path_hash[path]
    @subset_hash[:modified].files << fid
  end
  self
end

#tabulate_renamed_files(matching_signatures, basis_signature_hash, other_signature_hash) ⇒ FileGroupDifference

Returns Container for reporting the set of file-level differences of type ‘renamed’,‘copyadded’, or ‘copydeleted’.



255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
# File 'lib/moab/file_group_difference.rb', line 255

def tabulate_renamed_files(matching_signatures, basis_signature_hash, other_signature_hash)
  matching_signatures.each do |signature|
    basis_paths = basis_signature_hash[signature].paths
    other_paths = other_signature_hash[signature].paths
    basis_only_paths = basis_paths - other_paths
    other_only_paths = other_paths - basis_paths
    maxsize = [basis_only_paths.size, other_only_paths.size].max
    (0..maxsize-1).each do |n|
      fid = FileInstanceDifference.new()
      fid.basis_path = basis_only_paths[n]
      fid.other_path = other_only_paths[n]
      fid.signatures << signature
      if fid.basis_path.nil?
        fid.change = 'copyadded'
        fid.basis_path = basis_paths[0]
      elsif fid.other_path.nil?
        fid.change = 'copydeleted'
      else
        fid.change = 'renamed'
      end
      @subset_hash[fid.change.to_sym].files << fid
    end
  end
  self
end

#tabulate_unchanged_files(matching_signatures, basis_signature_hash, other_signature_hash) ⇒ FileGroupDifference

Returns Container for reporting the set of file-level differences of type ‘identical’.



230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
# File 'lib/moab/file_group_difference.rb', line 230

def tabulate_unchanged_files(matching_signatures, basis_signature_hash, other_signature_hash)
  matching_signatures.each do |signature|
    basis_paths = basis_signature_hash[signature].paths
    other_paths = other_signature_hash[signature].paths
    matching_paths = basis_paths & other_paths
    matching_paths.each do |path|
      fid = FileInstanceDifference.new(:change => 'identical')
      fid.basis_path = path
      fid.other_path = "same"
      fid.signatures << signature
      @subset_hash[:identical].files << fid
    end
  end
  self
end