Class: FileDigests

Inherits:
Object
  • Object
show all
Defined in:
lib/file-digests.rb

Constant Summary collapse

DIGEST_ALGORITHMS =
["BLAKE2b512", "SHA3-256", "SHA512-256"]
LEGACY_DIGEST_ALGORITHMS =
["SHA512", "SHA256"]

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(files_path, digest_database_path, options = {}) ⇒ FileDigests

Returns a new instance of FileDigests.



97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# File 'lib/file-digests.rb', line 97

def initialize files_path, digest_database_path, options = {}
  @options = options

  initialize_paths files_path, digest_database_path
  initialize_database

  @db.transaction(:exclusive) do
    if db_digest_algorithm = ("digest_algorithm")
      if @digest_algorithm = canonical_digest_algorithm_name(db_digest_algorithm)
        if @options[:digest_algorithm] && @options[:digest_algorithm] != @digest_algorithm
          @new_digest_algorithm = @options[:digest_algorithm]
        end
      else
        raise "Database contains data for unsupported digest algorithm: #{db_digest_algorithm}"
      end
    else
      @digest_algorithm = (@options[:digest_algorithm] || "BLAKE2b512")
       "digest_algorithm", @digest_algorithm
    end
  end
  puts "Using #{@digest_algorithm} digest algorithm" if @options[:verbose]
end

Class Method Details

.canonical_digest_algorithm_name(string) ⇒ Object



14
15
16
17
18
19
20
# File 'lib/file-digests.rb', line 14

def self.canonical_digest_algorithm_name(string)
  if string
    algorithms = DIGEST_ALGORITHMS + LEGACY_DIGEST_ALGORITHMS
    index = algorithms.map(&:downcase).index(string.downcase)
    index && algorithms[index]
  end
end

.digest_algorithms_list_textObject



26
27
28
# File 'lib/file-digests.rb', line 26

def self.digest_algorithms_list_text
  "Digest algorithm should be one of the following: #{DIGEST_ALGORITHMS.join ", "}"
end

.parse_cli_optionsObject



30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# File 'lib/file-digests.rb', line 30

def self.parse_cli_options
  options = {}

  OptionParser.new do |opts|
    opts.banner = [
      "Usage: file-digests [options] [path/to/directory] [path/to/database_file]",
      "       By default the current directory will be operated upon, and the database file will be placed to the current directory as well.",
      "       Should you wish to check current directory but place the database elsewhere, you could provide \".\" as a first argument, and the path to a database_file as a second."
    ].join "\n"

    opts.on("-a", "--auto", "Do not ask for any confirmation.") do
      options[:auto] = true
    end

    opts.on(
      "-d", "--digest DIGEST",
      'Select a digest algorithm to use. Default is "BLAKE2b512".',
      'You might also consider to use slower "SHA512-256" or even more slower "SHA3-256".',
      "#{digest_algorithms_list_text}.",
      "You only need to specify an algorithm on the first run, your choice will be saved to a database.",
      "Any time later you could specify a new algorithm to change the current one.",
      "Transition to a new algorithm will only occur if all files pass the check by digests which were stored using the old one."
    ) do |value|
      digest_algorithm = canonical_digest_algorithm_name(value)
      unless DIGEST_ALGORITHMS.include?(digest_algorithm)
        STDERR.puts "ERROR: #{digest_algorithms_list_text}"
        exit 1
      end
      options[:digest_algorithm] = digest_algorithm
    end

    opts.on("-f", "--accept-fate", "Accept the current state of files that are likely damaged and update their digest data.") do
      options[:accept_fate] = true
    end

    opts.on("-h", "--help", "Prints this help.") do
      puts opts
      exit
    end

    opts.on("-p", "--duplicates", "Show the list of duplicate files, based on the information out of the database.") do
      options[:action] = :show_duplicates
    end

    opts.on("-q", "--quiet", "Less verbose output, stil report any found issues.") do
      options[:quiet] = true
    end

    opts.on("-t", "--test", "Perform only the test, do not modify the digest database.") do
      options[:test_only] = true
    end

    opts.on("-v", "--verbose", "More verbose output.") do
      options[:verbose] = true
    end

  end.parse!
  options
end

.run_cli_utilityObject



90
91
92
93
94
95
# File 'lib/file-digests.rb', line 90

def self.run_cli_utility
  options = parse_cli_options

  file_digests = self.new ARGV[0], ARGV[1], options
  file_digests.send(options[:action] || :perform_check)
end

Instance Method Details

#canonical_digest_algorithm_name(string) ⇒ Object



22
23
24
# File 'lib/file-digests.rb', line 22

def canonical_digest_algorithm_name string
  self.class.canonical_digest_algorithm_name string
end

#initialize_databaseObject



137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
# File 'lib/file-digests.rb', line 137

def initialize_database
  @db = SQLite3::Database.new @digest_database_path.to_s
  @db.results_as_hash = true
  @db.busy_timeout = 5000

  file_digests_gem_version = Gem.loaded_specs["file-digests"]&.version&.to_s

  execute "PRAGMA encoding = 'UTF-8'"
  execute "PRAGMA locking_mode = 'EXCLUSIVE'"
  execute "PRAGMA journal_mode = 'WAL'"
  execute "PRAGMA synchronous = 'NORMAL'"
  execute "PRAGMA cache_size = '5000'"

  integrity_check

  @db.transaction(:exclusive) do
     = false
    unless table_exist?("metadata")
      execute "CREATE TABLE metadata (
        key TEXT NOT NULL PRIMARY KEY,
        value TEXT)"
      execute "CREATE UNIQUE INDEX metadata_key ON metadata(key)"
       = true
    end

    prepare_method :set_metadata_query, "INSERT INTO metadata (key, value) VALUES (?, ?) ON CONFLICT (key) DO UPDATE SET value=excluded.value"
    prepare_method :get_metadata_query, "SELECT value FROM metadata WHERE key = ?"

    ("metadata_table_created_by_gem_version", file_digests_gem_version) if file_digests_gem_version && 

    # Heuristic to detect database version 1 (metadata was not stored back then)
    unless ("database_version")
      if table_exist?("digests")
         "database_version", "1"
      end
    end

    unless table_exist?("digests")
      execute "CREATE TABLE digests (
        id INTEGER NOT NULL PRIMARY KEY,
        filename TEXT NOT NULL,
        mtime TEXT,
        digest TEXT NOT NULL,
        digest_check_time TEXT NOT NULL)"
      execute "CREATE UNIQUE INDEX digests_filename ON digests(filename)"
      ("digests_table_created_by_gem_version", file_digests_gem_version) if file_digests_gem_version
    end

    prepare_method :insert, "INSERT INTO digests (filename, mtime, digest, digest_check_time) VALUES (?, ?, ?, datetime('now'))"
    prepare_method :find_by_filename_query, "SELECT id, mtime, digest FROM digests WHERE filename = ?"
    prepare_method :touch_digest_check_time, "UPDATE digests SET digest_check_time = datetime('now') WHERE id = ?"
    prepare_method :update_mtime_and_digest, "UPDATE digests SET mtime = ?, digest = ?, digest_check_time = datetime('now') WHERE id = ?"
    prepare_method :update_mtime, "UPDATE digests SET mtime = ?, digest_check_time = datetime('now') WHERE id = ?"
    prepare_method :delete_by_filename, "DELETE FROM digests WHERE filename = ?"
    prepare_method :query_duplicates, "SELECT digest, filename FROM digests WHERE digest IN (SELECT digest FROM digests GROUP BY digest HAVING count(*) > 1) ORDER BY digest, filename;"
    prepare_method :update_digest_to_new_digest, "UPDATE digests SET digest = ? WHERE digest = ?"

    unless ("database_version")
       "database_version", "2"
    end

    # Convert database from 1st to 2nd version
    unless ("digest_algorithm")
      if ("database_version") == "1"
        if File.exist?(@digest_database_path.dirname + ".file-digests.sha512")
          ("digest_algorithm", "SHA512")
        else
          ("digest_algorithm", "SHA256")
        end
         "database_version", "2"
      end
    end

    if ("database_version") != "2"
      STDERR.puts "This version of file-digests (#{file_digests_gem_version || "unknown"}) is only compartible with the database version 2. Current database version is #{("database_version")}. To use this database, please install appropriate version if file-digest."
      raise "Incompatible database version"
    end
  end
end

#initialize_paths(files_path, digest_database_path) ⇒ Object



120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# File 'lib/file-digests.rb', line 120

def initialize_paths files_path, digest_database_path
  @files_path = cleanup_path(files_path || ".")

  raise "Files path must be a readable directory" unless (File.directory?(@files_path) && File.readable?(@files_path))

  @digest_database_path = digest_database_path ? cleanup_path(digest_database_path) : @files_path
  @digest_database_path += ".file-digests.sqlite" if File.directory?(@digest_database_path)
  ensure_dir_exist @digest_database_path.dirname

  @digest_database_files = ["#{@digest_database_path}", "#{@digest_database_path}-wal", "#{@digest_database_path}-shm"]

  if @options[:verbose]
    puts "Target directory: #{@files_path}"
    puts "Database location: #{@digest_database_path}"
  end
end

#perform_checkObject



217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
# File 'lib/file-digests.rb', line 217

def perform_check
  perhaps_transaction(@new_digest_algorithm, :exclusive) do
    @counters = {good: 0, updated: 0, new: 0, renamed: 0, likely_damaged: 0, exceptions: 0}
    @new_files = {}
    @new_digests = {}

    @missing_files = Hash[@db.prepare("SELECT filename, digest FROM digests").execute!]

    measure_time do
      walk_files do |filename|
        process_file filename
      end
    end

    nested_transaction do
      puts "Tracking renames..." if @options[:verbose]
      track_renames
    end

    if any_missing_files?
      if any_exceptions?
        STDERR.puts "Due to previously occurred errors, database cleanup from missing files will be skipped this time."
      else
        print_missing_files
        if !@options[:test_only] && (@options[:auto] || confirm("Remove missing files from the database"))
          nested_transaction do
            puts "Removing missing files..." if @options[:verbose]
            remove_missing_files
          end
        end
      end
    end

    if @new_digest_algorithm && !@options[:test_only]
      if any_missing_files? || any_likely_damaged? || any_exceptions?
        STDERR.puts "ERROR: New digest algorithm will not be in effect until there are files that are missing, likely damaged, or processed with an exception."
      else
        puts "Updating database to a new digest algorithm..." if @options[:verbose]
        @new_digests.each do |old_digest, new_digest|
          update_digest_to_new_digest new_digest, old_digest
        end
         "digest_algorithm", @new_digest_algorithm
        puts "Transition to a new digest algorithm complete: #{@new_digest_algorithm}"
      end
    end

    if any_likely_damaged? || any_exceptions?
      STDERR.puts "PLEASE REVIEW ERRORS THAT WERE OCCURRED!"
    end

    (@options[:test_only] ? "latest_test_only_check_time" : "latest_complete_check_time", time_to_database(Time.now))

    print_counters
  end
  
  puts "Performing database maintenance..." if @options[:verbose]
  execute "PRAGMA optimize"
  execute "VACUUM"
  execute "PRAGMA wal_checkpoint(TRUNCATE)"

  hide_database_files
end

#show_duplicatesObject



280
281
282
283
284
285
286
287
288
289
290
# File 'lib/file-digests.rb', line 280

def show_duplicates
  current_digest = nil
  query_duplicates.each do |found|
    if current_digest != found["digest"]
      puts "" if current_digest
      current_digest = found["digest"]
      puts "#{found["digest"]}:"
    end
    puts "  #{found["filename"]}"
  end
end