Class: FileDigests
- Inherits:
-
Object
- Object
- FileDigests
- Defined in:
- lib/file-digests.rb
Constant Summary collapse
- DIGEST_ALGORITHMS =
["BLAKE2b512", "SHA3-256", "SHA512-256"]
Class Method Summary collapse
- .canonical_digest_algorithm_name(string) ⇒ Object
- .digest_algorithms_list_text ⇒ Object
- .parse_cli_options ⇒ Object
- .run_cli_utility ⇒ Object
Instance Method Summary collapse
- #canonical_digest_algorithm_name(string) ⇒ Object
-
#initialize(files_path, digest_database_path, options = {}) ⇒ FileDigests
constructor
A new instance of FileDigests.
- #initialize_database ⇒ Object
- #initialize_paths(files_path, digest_database_path) ⇒ Object
- #perform_check ⇒ Object
- #show_duplicates ⇒ Object
Constructor Details
#initialize(files_path, digest_database_path, options = {}) ⇒ FileDigests
Returns a new instance of FileDigests.
90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
# File 'lib/file-digests.rb', line 90 def initialize files_path, digest_database_path, = {} = initialize_paths files_path, digest_database_path initialize_database if @digest_algorithm = canonical_digest_algorithm_name(("digest_algorithm")) if [:digest_algorithm] && [:digest_algorithm] != @digest_algorithm @new_digest_algorithm = [:digest_algorithm] end else @digest_algorithm = ([:digest_algorithm] || "BLAKE2b512") "digest_algorithm", @digest_algorithm end puts "Using #{@digest_algorithm} digest algorithm" if [:verbose] end |
Class Method Details
.canonical_digest_algorithm_name(string) ⇒ Object
13 14 15 16 17 18 |
# File 'lib/file-digests.rb', line 13 def self.canonical_digest_algorithm_name(string) if string index = DIGEST_ALGORITHMS.map(&:downcase).index(string.downcase) index && DIGEST_ALGORITHMS[index] end end |
.digest_algorithms_list_text ⇒ Object
24 25 26 |
# File 'lib/file-digests.rb', line 24 def self.digest_algorithms_list_text "Digest algorithm should be one of the following: #{DIGEST_ALGORITHMS.join ", "}" end |
.parse_cli_options ⇒ Object
28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
# File 'lib/file-digests.rb', line 28 def self. = {} OptionParser.new do |opts| opts. = [ "Usage: file-digests [options] [path/to/directory] [path/to/database_file]", " By default the current directory will be operated upon, and the database file will be placed to the current directory as well.", " Should you wish to check current directory but place the database elsewhere, you could provide \".\" as a first argument, and the path to a database_file as a second." ].join "\n" opts.on("-a", "--auto", "Do not ask for any confirmation") do [:auto] = true end opts.on( '--digest=DIGEST', 'Select a digest algorithm to use. Default is "BLAKE2b512".', 'You might also consider to use slower "SHA512-256" or even more slower "SHA3-256".', "#{digest_algorithms_list_text}.", 'You only need to specify an algorithm on the first run, your choice will be saved to a database.', 'Any time later you could specify a new algorithm to change the current one.', 'Transition to a new algorithm will only occur if all files pass the check by digests which were stored using the old one.' ) do |value| digest_algorithm = canonical_digest_algorithm_name(value) unless digest_algorithm STDERR.puts "ERROR: #{digest_algorithms_list_text}" exit 1 end [:digest_algorithm] = digest_algorithm end opts.on("-d", "--duplicates", "Show the list of duplicate files, based on the information out of the database") do [:action] = :show_duplicates end opts.on("-t", "--test", "Perform only the test, do not modify the digest database") do [:test_only] = true end opts.on("-q", "--quiet", "Less verbose output, stil report any found issues") do [:quiet] = true end opts.on("-v", "--verbose", "More verbose output") do [:verbose] = true end opts.on("-h", "--help", "Prints this help") do puts opts exit end end.parse! end |
.run_cli_utility ⇒ Object
83 84 85 86 87 88 |
# File 'lib/file-digests.rb', line 83 def self.run_cli_utility = file_digests = self.new ARGV[0], ARGV[1], file_digests.send([:action] || :perform_check) end |
Instance Method Details
#canonical_digest_algorithm_name(string) ⇒ Object
20 21 22 |
# File 'lib/file-digests.rb', line 20 def canonical_digest_algorithm_name string self.class.canonical_digest_algorithm_name string end |
#initialize_database ⇒ Object
123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 |
# File 'lib/file-digests.rb', line 123 def initialize_database @db = SQLite3::Database.new @digest_database_path.to_s @db.results_as_hash = true file_digests_gem_version = Gem.loaded_specs["file-digests"]&.version&.to_s execute 'PRAGMA encoding = "UTF-8"' execute 'PRAGMA journal_mode = "WAL"' execute 'PRAGMA synchronous = "NORMAL"' execute 'PRAGMA locking_mode = "EXCLUSIVE"' execute 'PRAGMA cache_size = "5000"' @db.transaction(:exclusive) do = false unless table_exist?("metadata") execute "CREATE TABLE metadata ( key TEXT NOT NULL PRIMARY KEY, value TEXT)" execute "CREATE UNIQUE INDEX metadata_key ON metadata(key)" = true end prepare_method :set_metadata_query, "INSERT INTO metadata (key, value) VALUES (?, ?) ON CONFLICT (key) DO UPDATE SET value=excluded.value" prepare_method :get_metadata_query, "SELECT value FROM metadata WHERE key = ?" ("metadata_table_created_by_gem_version", file_digests_gem_version) if file_digests_gem_version && # Heuristic to detect database version 1 (metadata was not stored back then) unless ("database_version") if table_exist?("digests") "database_version", "1" end end unless table_exist?("digests") execute "CREATE TABLE digests ( id INTEGER NOT NULL PRIMARY KEY, filename TEXT NOT NULL, mtime TEXT, digest TEXT NOT NULL, digest_check_time TEXT NOT NULL)" execute "CREATE UNIQUE INDEX digests_filename ON digests(filename)" ("digests_table_created_by_gem_version", file_digests_gem_version) if file_digests_gem_version end prepare_method :insert, "INSERT INTO digests (filename, mtime, digest, digest_check_time) VALUES (?, ?, ?, datetime('now'))" prepare_method :find_by_filename_query, "SELECT id, mtime, digest FROM digests WHERE filename = ?" prepare_method :touch_digest_check_time, "UPDATE digests SET digest_check_time = datetime('now') WHERE id = ?" prepare_method :update_mtime_and_digest, "UPDATE digests SET mtime = ?, digest = ?, digest_check_time = datetime('now') WHERE id = ?" prepare_method :update_mtime, "UPDATE digests SET mtime = ?, digest_check_time = datetime('now') WHERE id = ?" prepare_method :delete_by_filename, "DELETE FROM digests WHERE filename = ?" prepare_method :query_duplicates, "SELECT digest, filename FROM digests WHERE digest IN (SELECT digest FROM digests GROUP BY digest HAVING count(*) > 1) ORDER BY digest, filename;" prepare_method :update_digest_to_new_digest, "UPDATE digests SET digest = ? WHERE digest = ?" unless ("database_version") "database_version", "2" end # Convert database from 1st to 2nd version unless ("digest_algorithm") if ("database_version") == "1" if File.exist?(@digest_database_path.dirname + '.file-digests.sha512') ("digest_algorithm", "SHA512") else ("digest_algorithm", "SHA256") end "database_version", "2" end end if ("database_version") != "2" STDERR.puts "This version of file-digests is only compartible with the database version 2. Current database version is #{get_metadata("database_version")}. To use this database, please install appropriate version if file-digest." raise "Incompatible database version" end end end |
#initialize_paths(files_path, digest_database_path) ⇒ Object
108 109 110 111 112 113 114 115 116 117 118 119 120 121 |
# File 'lib/file-digests.rb', line 108 def initialize_paths files_path, digest_database_path @files_path = cleanup_path(files_path || ".") raise "Files path must be a readable directory" unless (File.directory?(@files_path) && File.readable?(@files_path)) @digest_database_path = digest_database_path ? cleanup_path(digest_database_path) : @files_path @digest_database_path += '.file-digests.sqlite' if File.directory?(@digest_database_path) ensure_dir_exists @digest_database_path.dirname if [:verbose] puts "Target directory: #{@files_path}" puts "Database location: #{@digest_database_path}" end end |
#perform_check ⇒ Object
200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 |
# File 'lib/file-digests.rb', line 200 def perform_check perhaps_transaction(@new_digest_algorithm, :exclusive) do @counters = {good: 0, updated: 0, new: 0, renamed: 0, likely_damaged: 0, exceptions: 0} @new_files = {} @new_digests = {} @missing_files = Hash[@db.prepare("SELECT filename, digest FROM digests").execute!] measure_time do walk_files do |filename| process_file filename end end track_renames if any_missing_files? if any_exceptions? STDERR.puts "Due to previously occurred errors, database cleanup from missing files will be skipped this time." else print_missing_files if ![:test_only] && ([:auto] || confirm("Remove missing files from the database")) remove_missing_files end end end if @new_digest_algorithm && ![:test_only] if any_missing_files? || any_likely_damaged? || any_exceptions? STDERR.puts "ERROR: New digest algorithm will not be in effect until there are files that are missing, likely damaged, or processed with an exception." else @new_digests.each do |old_digest, new_digest| update_digest_to_new_digest new_digest, old_digest end "digest_algorithm", @new_digest_algorithm end end if any_likely_damaged? || any_exceptions? STDERR.puts "PLEASE REVIEW ERRORS THAT WERE OCCURRED!" end ([:test_only] ? "latest_test_only_check_time" : "latest_complete_check_time", time_to_database(Time.now)) print_counters end end |
#show_duplicates ⇒ Object
248 249 250 251 252 253 254 255 256 257 258 |
# File 'lib/file-digests.rb', line 248 def show_duplicates current_digest = nil query_duplicates.each do |found| if current_digest != found['digest'] puts "" if current_digest current_digest = found['digest'] puts "#{found['digest']}:" end puts " #{found['filename']}" end end |