Class: FileDigests
- Inherits:
-
Object
- Object
- FileDigests
- Defined in:
- lib/file-digests.rb
Constant Summary collapse
- VERSION =
Gem.loaded_specs["file-digests"]&.version&.to_s
- DIGEST_ALGORITHMS =
["BLAKE2b512", "SHA3-256", "SHA512-256"]
- LEGACY_DIGEST_ALGORITHMS =
["SHA512", "SHA256"]
Class Method Summary collapse
- .canonical_digest_algorithm_name(string) ⇒ Object
- .digest_algorithms_list_text ⇒ Object
- .parse_cli_options ⇒ Object
- .run_cli_utility ⇒ Object
Instance Method Summary collapse
- #canonical_digest_algorithm_name(string) ⇒ Object
- #create_temporary_tables ⇒ Object
-
#initialize(files_path, digest_database_path, options = {}) ⇒ FileDigests
constructor
A new instance of FileDigests.
- #initialize_database ⇒ Object
- #initialize_paths(files_path, digest_database_path) ⇒ Object
- #perform_check ⇒ Object
- #show_duplicates ⇒ Object
Constructor Details
#initialize(files_path, digest_database_path, options = {}) ⇒ FileDigests
Returns a new instance of FileDigests.
117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
# File 'lib/file-digests.rb', line 117 def initialize files_path, digest_database_path, = {} @options = @user_input_wait_time = 0 initialize_paths files_path, digest_database_path initialize_database @db.transaction(:exclusive) do if db_digest_algorithm = ("digest_algorithm") if @digest_algorithm = canonical_digest_algorithm_name(db_digest_algorithm) if @options[:digest_algorithm] && @options[:digest_algorithm] != @digest_algorithm @new_digest_algorithm = @options[:digest_algorithm] end else raise "Database contains data for unsupported digest algorithm: #{db_digest_algorithm}" end else @digest_algorithm = (@options[:digest_algorithm] || "BLAKE2b512") "digest_algorithm", @digest_algorithm end end puts "Using #{@digest_algorithm} digest algorithm" if @options[:verbose] end |
Class Method Details
.canonical_digest_algorithm_name(string) ⇒ Object
29 30 31 32 33 34 35 |
# File 'lib/file-digests.rb', line 29 def self.canonical_digest_algorithm_name(string) if string algorithms = DIGEST_ALGORITHMS + LEGACY_DIGEST_ALGORITHMS index = algorithms.map(&:downcase).index(string.downcase) index && algorithms[index] end end |
.digest_algorithms_list_text ⇒ Object
41 42 43 |
# File 'lib/file-digests.rb', line 41 def self.digest_algorithms_list_text "Digest algorithm should be one of the following: #{DIGEST_ALGORITHMS.join ", "}" end |
.parse_cli_options ⇒ Object
45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
# File 'lib/file-digests.rb', line 45 def self. = {} OptionParser.new do |opts| opts. = [ "Usage: file-digests [options] [path/to/directory] [path/to/database_file]", " By default the current directory will be operated upon, and the database file will be placed to the current directory as well.", " Should you wish to check current directory but place the database elsewhere, you could provide \".\" as a first argument, and the path to a database_file as a second." ].join "\n" opts.on("-a", "--auto", "Do not ask for any confirmation.") do [:auto] = true end opts.on( "-d", "--digest DIGEST", 'Select a digest algorithm to use. Default is "BLAKE2b512".', 'You might also consider to use slower "SHA512-256" or even more slower "SHA3-256".', "#{digest_algorithms_list_text}.", "You only need to specify an algorithm on the first run, your choice will be saved to a database.", "Any time later you could specify a new algorithm to change the current one.", "Transition to a new algorithm will only occur if all files pass the check by digests which were stored using the old one." ) do |value| digest_algorithm = canonical_digest_algorithm_name(value) unless DIGEST_ALGORITHMS.include?(digest_algorithm) STDERR.puts "ERROR: #{digest_algorithms_list_text}" exit 1 end [:digest_algorithm] = digest_algorithm end opts.on("-f", "--accept-fate", "Accept the current state of files that are likely damaged and update their digest data.") do [:accept_fate] = true end opts.on("-h", "--help", "Prints this help.") do puts opts exit end opts.on("-p", "--duplicates", "Show the list of duplicate files, based on the information out of the database.") do [:action] = :show_duplicates end opts.on("-q", "--quiet", "Less verbose output, stil report any found issues.") do [:quiet] = true end opts.on( "-t", "--test", "Perform a test to verify directory contents.", "Compare actual files with the stored digests, check if any files are missing.", "Digest database will not be modified." ) do [:test_only] = true end opts.on("-v", "--verbose", "More verbose output.") do [:verbose] = true end end.parse! end |
.run_cli_utility ⇒ Object
110 111 112 113 114 115 |
# File 'lib/file-digests.rb', line 110 def self.run_cli_utility = file_digests = self.new ARGV[0], ARGV[1], file_digests.send([:action] || :perform_check) end |
Instance Method Details
#canonical_digest_algorithm_name(string) ⇒ Object
37 38 39 |
# File 'lib/file-digests.rb', line 37 def canonical_digest_algorithm_name string self.class.canonical_digest_algorithm_name string end |
#create_temporary_tables ⇒ Object
238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 |
# File 'lib/file-digests.rb', line 238 def create_temporary_tables execute "CREATE TEMPORARY TABLE new_files ( filename TEXT NOT NULL PRIMARY KEY, digest TEXT NOT NULL)" execute "CREATE INDEX new_files_digest ON new_files(digest)" prepare_method :new_files_insert, "INSERT INTO new_files (filename, digest) VALUES (?, ?)" prepare_method :new_files_count_query, "SELECT count(*) FROM new_files" execute "CREATE TEMPORARY TABLE missing_files ( filename TEXT NOT NULL PRIMARY KEY, digest TEXT NOT NULL)" execute "CREATE INDEX missing_files_digest ON missing_files(digest)" execute "INSERT INTO missing_files (filename, digest) SELECT filename, digest FROM digests" prepare_method :missing_files_delete, "DELETE FROM missing_files WHERE filename = ?" prepare_method :missing_files_delete_renamed_files, "DELETE FROM missing_files WHERE digest IN (SELECT digest FROM new_files)" prepare_method :missing_files_select_all_filenames, "SELECT filename FROM missing_files ORDER BY filename" prepare_method :missing_files_delete_all, "DELETE FROM missing_files" prepare_method :missing_files_count_query, "SELECT count(*) FROM missing_files" prepare_method :digests_delete_renamed_files, "DELETE FROM digests WHERE filename IN (SELECT filename FROM missing_files WHERE digest IN (SELECT digest FROM new_files))" prepare_method :digests_delete_all_missing_files, "DELETE FROM digests WHERE filename IN (SELECT filename FROM missing_files)" execute "CREATE TEMPORARY TABLE new_digests ( filename TEXT NOT NULL PRIMARY KEY, digest TEXT NOT NULL)" prepare_method :new_digests_insert, "INSERT INTO new_digests (filename, digest) VALUES (?, ?)" prepare_method :digests_update_digests_to_new_digests, "INSERT INTO digests (filename, digest, digest_check_time) SELECT filename, digest, false FROM new_digests WHERE true ON CONFLICT (filename) DO UPDATE SET digest=excluded.digest" end |
#initialize_database ⇒ Object
158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 |
# File 'lib/file-digests.rb', line 158 def initialize_database @db = SQLite3::Database.new @digest_database_path.to_s @db.results_as_hash = true @db.busy_timeout = 5000 execute "PRAGMA encoding = 'UTF-8'" execute "PRAGMA locking_mode = 'EXCLUSIVE'" execute "PRAGMA journal_mode = 'WAL'" execute "PRAGMA synchronous = 'NORMAL'" execute "PRAGMA cache_size = '5000'" integrity_check @db.transaction(:exclusive) do = false unless table_exist?("metadata") execute "CREATE TABLE metadata ( key TEXT NOT NULL PRIMARY KEY, value TEXT)" = true end prepare_method :set_metadata_query, "INSERT INTO metadata (key, value) VALUES (?, ?) ON CONFLICT (key) DO UPDATE SET value=excluded.value" prepare_method :get_metadata_query, "SELECT value FROM metadata WHERE key = ?" ("metadata_table_created_by_gem_version", FileDigests::VERSION) if FileDigests::VERSION && # Heuristic to detect database version 1 (metadata was not stored back then) unless ("database_version") if table_exist?("digests") "database_version", "1" end end unless table_exist?("digests") execute "CREATE TABLE digests ( id INTEGER NOT NULL PRIMARY KEY, filename TEXT NOT NULL, mtime TEXT, digest TEXT NOT NULL, digest_check_time TEXT NOT NULL)" execute "CREATE UNIQUE INDEX digests_filename ON digests(filename)" execute "CREATE INDEX digests_digest ON digests(digest)" ("digests_table_created_by_gem_version", FileDigests::VERSION) if FileDigests::VERSION end prepare_method :digests_insert, "INSERT INTO digests (filename, mtime, digest, digest_check_time) VALUES (?, ?, ?, datetime('now'))" prepare_method :digests_find_by_filename_query, "SELECT id, mtime, digest FROM digests WHERE filename = ?" prepare_method :digests_touch_check_time, "UPDATE digests SET digest_check_time = datetime('now') WHERE id = ?" prepare_method :digests_update_mtime_and_digest, "UPDATE digests SET mtime = ?, digest = ?, digest_check_time = datetime('now') WHERE id = ?" prepare_method :digests_update_mtime, "UPDATE digests SET mtime = ?, digest_check_time = datetime('now') WHERE id = ?" prepare_method :digests_select_duplicates, "SELECT digest, filename FROM digests WHERE digest IN (SELECT digest FROM digests GROUP BY digest HAVING count(*) > 1) ORDER BY digest, filename;" unless ("database_version") "database_version", "3" end # Convert database from 1st to 2nd version unless ("digest_algorithm") if ("database_version") == "1" if File.exist?(@digest_database_path.dirname + ".file-digests.sha512") ("digest_algorithm", "SHA512") else ("digest_algorithm", "SHA256") end "database_version", "2" end end if ("database_version") == "2" execute "CREATE INDEX digests_digest ON digests(digest)" "database_version", "3" end check_if_database_is_at_certain_version "3" create_temporary_tables end end |
#initialize_paths(files_path, digest_database_path) ⇒ Object
141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
# File 'lib/file-digests.rb', line 141 def initialize_paths files_path, digest_database_path @files_path = cleanup_path(files_path || ".") raise "Files path must be a readable directory" unless (File.directory?(@files_path) && File.readable?(@files_path)) @digest_database_path = digest_database_path ? cleanup_path(digest_database_path) : @files_path @digest_database_path += ".file-digests.sqlite" if File.directory?(@digest_database_path) ensure_dir_exist @digest_database_path.dirname @digest_database_files = ["#{@digest_database_path}", "#{@digest_database_path}-wal", "#{@digest_database_path}-shm"] if @options[:verbose] puts "Target directory: #{@files_path}" puts "Database location: #{@digest_database_path}" end end |
#perform_check ⇒ Object
271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 |
# File 'lib/file-digests.rb', line 271 def perform_check measure_time do perhaps_transaction(@new_digest_algorithm, :exclusive) do @counters = {good: 0, updated: 0, renamed: 0, likely_damaged: 0, exceptions: 0} walk_files do |filename| process_file filename end nested_transaction do puts "Tracking renames..." if @options[:verbose] track_renames end if any_missing_files? if any_exceptions? STDERR.puts "Due to previously occurred errors, missing files will not removed from the database." else print_missing_files if !@options[:test_only] && (@options[:auto] || confirm("Remove missing files from the database")) nested_transaction do puts "Removing missing files..." if @options[:verbose] remove_missing_files end end end end if @new_digest_algorithm && !@options[:test_only] if any_missing_files? || any_likely_damaged? || any_exceptions? STDERR.puts "ERROR: New digest algorithm will not be in effect until there are files that are missing, likely damaged, or processed with an exception." else puts "Updating database to a new digest algorithm..." if @options[:verbose] digests_update_digests_to_new_digests "digest_algorithm", @new_digest_algorithm puts "Transition to a new digest algorithm complete: #{@new_digest_algorithm}" end end if any_likely_damaged? || any_exceptions? STDERR.puts "PLEASE REVIEW ERRORS THAT WERE OCCURRED!" end (@options[:test_only] ? "latest_test_only_check_time" : "latest_complete_check_time", time_to_database(Time.now)) print_counters end puts "Performing database maintenance..." if @options[:verbose] execute "PRAGMA optimize" execute "VACUUM" execute "PRAGMA wal_checkpoint(TRUNCATE)" hide_database_files end end |
#show_duplicates ⇒ Object
328 329 330 331 332 333 334 335 336 337 338 |
# File 'lib/file-digests.rb', line 328 def show_duplicates current_digest = nil digests_select_duplicates.each do |found| if current_digest != found["digest"] puts "" if current_digest current_digest = found["digest"] puts "#{found["digest"]}:" end puts " #{found["filename"]}" end end |