Class: FileDigests

Inherits:
Object
  • Object
show all
Defined in:
lib/file-digests.rb

Constant Summary collapse

VERSION =
Gem.loaded_specs["file-digests"]&.version&.to_s
DIGEST_ALGORITHMS =
["BLAKE2b512", "SHA3-256", "SHA512-256"]
LEGACY_DIGEST_ALGORITHMS =
["SHA512", "SHA256"]

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(files_path, digest_database_path, options = {}) ⇒ FileDigests

Returns a new instance of FileDigests.



117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# File 'lib/file-digests.rb', line 117

def initialize files_path, digest_database_path, options = {}
  @options = options
  @user_input_wait_time = 0

  initialize_paths files_path, digest_database_path
  initialize_database

  @db.transaction(:exclusive) do
    if db_digest_algorithm = ("digest_algorithm")
      if @digest_algorithm = canonical_digest_algorithm_name(db_digest_algorithm)
        if @options[:digest_algorithm] && @options[:digest_algorithm] != @digest_algorithm
          @new_digest_algorithm = @options[:digest_algorithm]
        end
      else
        raise "Database contains data for unsupported digest algorithm: #{db_digest_algorithm}"
      end
    else
      @digest_algorithm = (@options[:digest_algorithm] || "BLAKE2b512")
       "digest_algorithm", @digest_algorithm
    end
  end
  puts "Using #{@digest_algorithm} digest algorithm" if @options[:verbose]
end

Class Method Details

.canonical_digest_algorithm_name(string) ⇒ Object



29
30
31
32
33
34
35
# File 'lib/file-digests.rb', line 29

def self.canonical_digest_algorithm_name(string)
  if string
    algorithms = DIGEST_ALGORITHMS + LEGACY_DIGEST_ALGORITHMS
    index = algorithms.map(&:downcase).index(string.downcase)
    index && algorithms[index]
  end
end

.digest_algorithms_list_textObject



41
42
43
# File 'lib/file-digests.rb', line 41

def self.digest_algorithms_list_text
  "Digest algorithm should be one of the following: #{DIGEST_ALGORITHMS.join ", "}"
end

.parse_cli_optionsObject



45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# File 'lib/file-digests.rb', line 45

def self.parse_cli_options
  options = {}

  OptionParser.new do |opts|
    opts.banner = [
      "Usage: file-digests [options] [path/to/directory] [path/to/database_file]",
      "       By default the current directory will be operated upon, and the database file will be placed to the current directory as well.",
      "       Should you wish to check current directory but place the database elsewhere, you could provide \".\" as a first argument, and the path to a database_file as a second."
    ].join "\n"

    opts.on("-a", "--auto", "Do not ask for any confirmation.") do
      options[:auto] = true
    end

    opts.on(
      "-d", "--digest DIGEST",
      'Select a digest algorithm to use. Default is "BLAKE2b512".',
      'You might also consider to use slower "SHA512-256" or even more slower "SHA3-256".',
      "#{digest_algorithms_list_text}.",
      "You only need to specify an algorithm on the first run, your choice will be saved to a database.",
      "Any time later you could specify a new algorithm to change the current one.",
      "Transition to a new algorithm will only occur if all files pass the check by digests which were stored using the old one."
    ) do |value|
      digest_algorithm = canonical_digest_algorithm_name(value)
      unless DIGEST_ALGORITHMS.include?(digest_algorithm)
        STDERR.puts "ERROR: #{digest_algorithms_list_text}"
        exit 1
      end
      options[:digest_algorithm] = digest_algorithm
    end

    opts.on("-f", "--accept-fate", "Accept the current state of files that are likely damaged and update their digest data.") do
      options[:accept_fate] = true
    end

    opts.on("-h", "--help", "Prints this help.") do
      puts opts
      exit
    end

    opts.on("-p", "--duplicates", "Show the list of duplicate files, based on the information out of the database.") do
      options[:action] = :show_duplicates
    end

    opts.on("-q", "--quiet", "Less verbose output, stil report any found issues.") do
      options[:quiet] = true
    end

    opts.on(
      "-t", "--test",
      "Perform a test to verify directory contents.",
      "Compare actual files with the stored digests, check if any files are missing.",
      "Digest database will not be modified."
    ) do
      options[:test_only] = true
    end

    opts.on("-v", "--verbose", "More verbose output.") do
      options[:verbose] = true
    end

  end.parse!
  options
end

.run_cli_utilityObject



110
111
112
113
114
115
# File 'lib/file-digests.rb', line 110

def self.run_cli_utility
  options = parse_cli_options

  file_digests = self.new ARGV[0], ARGV[1], options
  file_digests.send(options[:action] || :perform_check)
end

Instance Method Details

#canonical_digest_algorithm_name(string) ⇒ Object



37
38
39
# File 'lib/file-digests.rb', line 37

def canonical_digest_algorithm_name string
  self.class.canonical_digest_algorithm_name string
end

#create_temporary_tablesObject



238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
# File 'lib/file-digests.rb', line 238

def create_temporary_tables
  execute "CREATE TEMPORARY TABLE new_files (
    filename TEXT NOT NULL PRIMARY KEY,
    digest TEXT NOT NULL)"
  execute "CREATE INDEX new_files_digest ON new_files(digest)"

  prepare_method :new_files_insert, "INSERT INTO new_files (filename, digest) VALUES (?, ?)"
  prepare_method :new_files_count_query, "SELECT count(*) FROM new_files"

  execute "CREATE TEMPORARY TABLE missing_files (
    filename TEXT NOT NULL PRIMARY KEY,
    digest TEXT NOT NULL)"
  execute "CREATE INDEX missing_files_digest ON missing_files(digest)"

  execute "INSERT INTO missing_files (filename, digest) SELECT filename, digest FROM digests"

  prepare_method :missing_files_delete, "DELETE FROM missing_files WHERE filename = ?"
  prepare_method :missing_files_delete_renamed_files, "DELETE FROM missing_files WHERE digest IN (SELECT digest FROM new_files)"
  prepare_method :missing_files_select_all_filenames, "SELECT filename FROM missing_files ORDER BY filename"
  prepare_method :missing_files_delete_all, "DELETE FROM missing_files"
  prepare_method :missing_files_count_query, "SELECT count(*) FROM missing_files"

  prepare_method :digests_delete_renamed_files, "DELETE FROM digests WHERE filename IN (SELECT filename FROM missing_files WHERE digest IN (SELECT digest FROM new_files))"
  prepare_method :digests_delete_all_missing_files, "DELETE FROM digests WHERE filename IN (SELECT filename FROM missing_files)"

  execute "CREATE TEMPORARY TABLE new_digests (
    filename TEXT NOT NULL PRIMARY KEY,
    digest TEXT NOT NULL)"

  prepare_method :new_digests_insert, "INSERT INTO new_digests (filename, digest) VALUES (?, ?)"
  prepare_method :digests_update_digests_to_new_digests, "INSERT INTO digests (filename, digest, digest_check_time) SELECT filename, digest, false FROM new_digests WHERE true ON CONFLICT (filename) DO UPDATE SET digest=excluded.digest"
end

#initialize_databaseObject



158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
# File 'lib/file-digests.rb', line 158

def initialize_database
  @db = SQLite3::Database.new @digest_database_path.to_s
  @db.results_as_hash = true
  @db.busy_timeout = 5000

  execute "PRAGMA encoding = 'UTF-8'"
  execute "PRAGMA locking_mode = 'EXCLUSIVE'"
  execute "PRAGMA journal_mode = 'WAL'"
  execute "PRAGMA synchronous = 'NORMAL'"
  execute "PRAGMA cache_size = '5000'"

  integrity_check

  @db.transaction(:exclusive) do
     = false
    unless table_exist?("metadata")
      execute "CREATE TABLE metadata (
        key TEXT NOT NULL PRIMARY KEY,
        value TEXT)"
       = true
    end

    prepare_method :set_metadata_query, "INSERT INTO metadata (key, value) VALUES (?, ?) ON CONFLICT (key) DO UPDATE SET value=excluded.value"
    prepare_method :get_metadata_query, "SELECT value FROM metadata WHERE key = ?"

    ("metadata_table_created_by_gem_version", FileDigests::VERSION) if FileDigests::VERSION && 

    # Heuristic to detect database version 1 (metadata was not stored back then)
    unless ("database_version")
      if table_exist?("digests")
         "database_version", "1"
      end
    end

    unless table_exist?("digests")
      execute "CREATE TABLE digests (
        id INTEGER NOT NULL PRIMARY KEY,
        filename TEXT NOT NULL,
        mtime TEXT,
        digest TEXT NOT NULL,
        digest_check_time TEXT NOT NULL)"
      execute "CREATE UNIQUE INDEX digests_filename ON digests(filename)"
      execute "CREATE INDEX digests_digest ON digests(digest)"
      ("digests_table_created_by_gem_version", FileDigests::VERSION) if FileDigests::VERSION
    end

    prepare_method :digests_insert, "INSERT INTO digests (filename, mtime, digest, digest_check_time) VALUES (?, ?, ?, datetime('now'))"
    prepare_method :digests_find_by_filename_query, "SELECT id, mtime, digest FROM digests WHERE filename = ?"
    prepare_method :digests_touch_check_time, "UPDATE digests SET digest_check_time = datetime('now') WHERE id = ?"
    prepare_method :digests_update_mtime_and_digest, "UPDATE digests SET mtime = ?, digest = ?, digest_check_time = datetime('now') WHERE id = ?"
    prepare_method :digests_update_mtime, "UPDATE digests SET mtime = ?, digest_check_time = datetime('now') WHERE id = ?"
    prepare_method :digests_select_duplicates, "SELECT digest, filename FROM digests WHERE digest IN (SELECT digest FROM digests GROUP BY digest HAVING count(*) > 1) ORDER BY digest, filename;"

    unless ("database_version")
       "database_version", "3"
    end

    # Convert database from 1st to 2nd version
    unless ("digest_algorithm")
      if ("database_version") == "1"
        if File.exist?(@digest_database_path.dirname + ".file-digests.sha512")
          ("digest_algorithm", "SHA512")
        else
          ("digest_algorithm", "SHA256")
        end
         "database_version", "2"
      end
    end

    if ("database_version") == "2"
      execute "CREATE INDEX digests_digest ON digests(digest)"
       "database_version", "3"
    end

    check_if_database_is_at_certain_version "3"

    create_temporary_tables
  end
end

#initialize_paths(files_path, digest_database_path) ⇒ Object



141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
# File 'lib/file-digests.rb', line 141

def initialize_paths files_path, digest_database_path
  @files_path = cleanup_path(files_path || ".")

  raise "Files path must be a readable directory" unless (File.directory?(@files_path) && File.readable?(@files_path))

  @digest_database_path = digest_database_path ? cleanup_path(digest_database_path) : @files_path
  @digest_database_path += ".file-digests.sqlite" if File.directory?(@digest_database_path)
  ensure_dir_exist @digest_database_path.dirname

  @digest_database_files = ["#{@digest_database_path}", "#{@digest_database_path}-wal", "#{@digest_database_path}-shm"]

  if @options[:verbose]
    puts "Target directory: #{@files_path}"
    puts "Database location: #{@digest_database_path}"
  end
end

#perform_checkObject



271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
# File 'lib/file-digests.rb', line 271

def perform_check
  measure_time do
    perhaps_transaction(@new_digest_algorithm, :exclusive) do
      @counters = {good: 0, updated: 0, renamed: 0, likely_damaged: 0, exceptions: 0}

      walk_files do |filename|
        process_file filename
      end

      nested_transaction do
        puts "Tracking renames..." if @options[:verbose]
        track_renames
      end

      if any_missing_files?
        if any_exceptions?
          STDERR.puts "Due to previously occurred errors, missing files will not removed from the database."
        else
          print_missing_files
          if !@options[:test_only] && (@options[:auto] || confirm("Remove missing files from the database"))
            nested_transaction do
              puts "Removing missing files..." if @options[:verbose]
              remove_missing_files
            end
          end
        end
      end

      if @new_digest_algorithm && !@options[:test_only]
        if any_missing_files? || any_likely_damaged? || any_exceptions?
          STDERR.puts "ERROR: New digest algorithm will not be in effect until there are files that are missing, likely damaged, or processed with an exception."
        else
          puts "Updating database to a new digest algorithm..." if @options[:verbose]
          digests_update_digests_to_new_digests
           "digest_algorithm", @new_digest_algorithm
          puts "Transition to a new digest algorithm complete: #{@new_digest_algorithm}"
        end
      end

      if any_likely_damaged? || any_exceptions?
        STDERR.puts "PLEASE REVIEW ERRORS THAT WERE OCCURRED!"
      end

      (@options[:test_only] ? "latest_test_only_check_time" : "latest_complete_check_time", time_to_database(Time.now))

      print_counters
    end
    
    puts "Performing database maintenance..." if @options[:verbose]
    execute "PRAGMA optimize"
    execute "VACUUM"
    execute "PRAGMA wal_checkpoint(TRUNCATE)"

    hide_database_files
  end
end

#show_duplicatesObject



328
329
330
331
332
333
334
335
336
337
338
# File 'lib/file-digests.rb', line 328

def show_duplicates
  current_digest = nil
  digests_select_duplicates.each do |found|
    if current_digest != found["digest"]
      puts "" if current_digest
      current_digest = found["digest"]
      puts "#{found["digest"]}:"
    end
    puts "  #{found["filename"]}"
  end
end