Class: NauktisUtils::Duplicate

Inherits:
Object
  • Object
show all
Includes:
Logging
Defined in:
lib/nauktis_utils/duplicate.rb

Overview

Class to find and handle duplicate files.

Defined Under Namespace

Classes: DeletingStrategy, HandlingStrategy

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Methods included from Logging

#logger, logger, logger=

Constructor Details

#initialize(handling_strategy) ⇒ Duplicate

Returns a new instance of Duplicate.



132
133
134
# File 'lib/nauktis_utils/duplicate.rb', line 132

def initialize(handling_strategy)
  @handling_strategy = handling_strategy
end

Instance Attribute Details

#handling_strategyObject



130
131
132
# File 'lib/nauktis_utils/duplicate.rb', line 130

def handling_strategy
  @handling_strategy
end

Class Method Details

.algorithm(name) ⇒ Object



6
7
8
9
10
11
12
13
14
15
16
17
# File 'lib/nauktis_utils/duplicate.rb', line 6

def self.algorithm(name)
  key = name.to_sym
  @@algorithms ||= {}
  unless @@algorithms.has_key? key
    @@algorithms[:name] = proc { |file| File.basename(file).downcase }
    @@algorithms[:size] = proc { |file| File.size(file) }
    @@algorithms[:md5] = proc { |file| FileDigester.digest(file, :md5) }
    @@algorithms[:sha1] = proc { |file| FileDigester.digest(file, :sha1) }
    @@algorithms[:sha3] = proc { |file| FileDigester.digest(file, :sha3) }
  end
  @@algorithms.fetch key
end

Instance Method Details

#clean(directories) ⇒ Object



136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
# File 'lib/nauktis_utils/duplicate.rb', line 136

def clean(directories)
  logger.info "Searching duplicates in #{directories}"
  directories.map! { |d| File.expand_path(d) }
  files = files_in(directories)
  logger.info "Number of files: #{files.size.to_s(:delimited)}"
  size_before = size_of(directories)
  logger.info "Total size: #{size_before.to_s(:human_size)}"

  @groupings = [self.class.algorithm(:size), self.class.algorithm(:md5), self.class.algorithm(:sha3)]
  multi_group_by(files, 0)

  size_after = size_of(directories)
  logger.info "Total size: #{size_after.to_s(:human_size)}"
  reduction_ratio = (100 * (size_before - size_after) / size_before.to_f).round(2)
  logger.info "Size reduced by #{reduction_ratio}% (#{size_after.to_s(:delimited)}/#{size_before.to_s(:delimited)})"
end