Module: OcflTools::Utils::Files

Defined in:
lib/ocfl_tools/utils_file.rb

Class Method Summary collapse

Class Method Details

.create_digests(files, digestAlgorithm) ⇒ Object

Given an array of files and a digestAlgorithm, create digests and return results in a [Hash]



151
152
153
154
155
156
157
158
# File 'lib/ocfl_tools/utils_file.rb', line 151

def self.create_digests(files, digestAlgorithm)
  my_digests = {}
  array = Array(files) # make sure it's an array, so we can handle single files as well.
  array.each do |file|
    my_digests[file] = OcflTools::Utils.generate_file_digest(file, digestAlgorithm)
  end
  my_digests
end

.expand_filepaths(files, object_root_dir) ⇒ Object

Given an array of (relative to object root) filepaths, expand to fully-resovable filesystem paths. If the object_root_dir is already at the front of the filepath, don’t add it again.



162
163
164
165
166
167
168
169
170
171
172
# File 'lib/ocfl_tools/utils_file.rb', line 162

def self.expand_filepaths(files, object_root_dir)
  array = Array(files) # make sure whatever we have is an array, so we can handle single files too.
  my_full_filepaths = []
  array.each do |f|
    #  /^#{object_root_dir}/ matches on what we want.
    unless f =~ /^#{object_root_dir}/
      my_full_filepaths << "#{object_root_dir}/#{f}"
    end
  end
  my_full_filepaths
end

.get_dir_files(directory) ⇒ Array

Given a directory, return a list of all files (no dirs or special files) found beneath it.



9
10
11
12
13
14
15
16
17
18
19
# File 'lib/ocfl_tools/utils_file.rb', line 9

def self.get_dir_files(directory)
  # Don't crash out if the requested dir doesn't exist, just state the obvious: there are no files in it.
  return [] unless Dir.exist?(directory) == true

  Dir.chdir(directory)
  directory_files = []
  Dir.glob('**/*').select do |file|
    directory_files << file if File.file? file
  end
  directory_files
end

.get_latest_inventory(object_root_dir) ⇒ Object

Given an object root dir, get the most recent inventory file.



175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
# File 'lib/ocfl_tools/utils_file.rb', line 175

def self.get_latest_inventory(object_root_dir)
  # Tries most recent version dir first, then object root, then other version dirs.
  # g_v_d returns a sorted array already. Reverse it, so we start with highest version.
  my_versions = OcflTools::Utils::Files.get_version_directories(object_root_dir).reverse
  case
  # Return the inventory file in the highest version dir, if it exists.
  when File.exist?("#{object_root_dir}/#{my_versions[0]}/inventory.json")
    return "#{object_root_dir}/#{my_versions[0]}/inventory.json"
  # Otherwise, return the inventory file in the root, if it exists.
  when File.exist?("#{object_root_dir}/inventory.json")
    return "#{object_root_dir}/inventory.json"
  else
    # We don't have a highest-version inventory, and we don't have a root inventory.
    # This is a problem! But there might be an inventory file in a non-highest-version dir.
    # Quit out here if there was only 1 version directory (We've already checked this)
    unless my_versions.size > 1
      # This is a validation error; no inventory files found.
      raise OcflTools::Errors::ValidationError, details: { "E215" => ["OCFL 3.1 Expected inventory file not found in #{object_root_dir} or discovered version directories."] }
    end

    my_versions.delete_at(0) # drop the first element (we've already checked it).
    my_versions.each do |v|
      # Return the highest version inventory file we find.
      # Note, this is technically a non-compliant object root, but we
      # want to return a result if we have one - we might be attempting object
      # recovery and something is better than nothing.
      if File.exist?("#{object_root_dir}/#{v}/inventory.json")
        return "#{object_root_dir}/#{v}/inventory.json"
      end
    end
    # If we get here, no inventory file found in any version dirs or the object root.
    # This is a validation error.
    raise OcflTools::Errors::ValidationError, details: { "E215" => ["OCFL 3.1 Expected inventory file not found in #{object_root_dir} or discovered version directories."] }
  end
end

.get_version_dir_files(object_root_dir, version) ⇒ Array

Given an object root and a version, return the files on disk in the appropriate content dir.



23
24
25
26
27
28
29
30
31
32
33
34
35
# File 'lib/ocfl_tools/utils_file.rb', line 23

def self.get_version_dir_files(object_root_dir, version)
  version_format = OcflTools::Utils::Files.get_version_format(object_root_dir)
  # Int to version format
  version_name = version_format % version.to_i
  # Get latest inventory file
  inventory = OcflTools::Utils::Files.get_latest_inventory(object_root_dir)
  # Get contentDirectory value from inventory (or use default value)
  contentDirectory = OcflTools::Utils::Inventory.get_contentDirectory(inventory)
  # Now bring it together and get the goods.
  my_files = OcflTools::Utils::Files.get_dir_files("#{object_root_dir}/#{version_name}/#{contentDirectory}")
  # And expand it to a full file path
  OcflTools::Utils::Files.expand_filepaths(my_files, "#{object_root_dir}/#{version_name}/#{contentDirectory}")
end

.get_version_directories(object_root_dir) ⇒ Object

Given an object root directory, deduce and return the version directories by inspecting disk.



53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# File 'lib/ocfl_tools/utils_file.rb', line 53

def self.get_version_directories(object_root_dir)
  unless Dir.exist?(object_root_dir) == true
    # This is a client error.
    raise OcflTools::Errors::RequestedDirectoryNotFound, "#{object_root_dir} does not exist!"
  end

  object_root_dirs = []
  version_directories = []
  Dir.chdir(object_root_dir)
  Dir.glob('*').select do |file|
    object_root_dirs << file if File.directory? file
  end
  if object_root_dirs.empty?
    # This is a validation error.
    raise OcflTools::Errors::ValidationError, details: { "E100" => ["#{object_root_dir} is empty!"] }
  end

  # Needs to call get version_format method here.
  object_root_dirs.each do |i|
    if i =~ /[^"{OcflTools::Utils.Files.get_version_format(object_root_dir)}"$]/
      version_directories << i
    end
  end
  # This is a validation error; we expect to find matching version directories.
  if version_directories.empty?
    raise OcflTools::Errors::ValidationError, details: { "E013" => ["#{object_root_dir} must contain at least one identifiable version directory."] }
  end
  version_directories.sort! # sort it, to be nice.
end

.get_version_format(object_root_dir) ⇒ Object

Given an object_root_directory, deduce the format used to describe version directories.



84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# File 'lib/ocfl_tools/utils_file.rb', line 84

def self.get_version_format(object_root_dir)
  unless Dir.exist?(object_root_dir) == true
    # This is a client error.
    raise OcflTools::Errors::RequestedDirectoryNotFound, "#{object_root_dir} does not exist!"
  end

  # Get all directories starting with 'v', sort them.
  # Take the top of the sort. Count the number of 0s found.
  # Raises errors if it can't find an appropriate version 1 directory.
  version_dirs = []
  Dir.chdir(object_root_dir)
  Dir.glob('v*').select do |file|
    version_dirs << file if File.directory? file
  end
  version_dirs.sort!
  # if there's a verson_dirs that's just 'v', throw it out! It's hot garbage edge case we'll deal with later.
  version_dirs.delete('v') if version_dirs.include? 'v'

  first_version = version_dirs[0] # the first element should be the first version directory.
  first_version.slice!(0, 1) # cut the leading 'v' from the string.
  if first_version.length == 1 # A length of 1 for the first version implies 'v1'
    unless first_version.to_i == 1
      # This is a validation error; there must be a v1 directory.
      # E015 "OCFL 3.5.3 Expected version sequence not found. Expected version #{count}, found version #{my_versions[count]}."
      raise OcflTools::Errors::ValidationError, details: { "E015" => ["Expected version 1 not found. Found version #{first_version.to_i} instead."] }
    end

    version_format = 'v%d'
  else
    # Make sure this is Integer 1.
    unless first_version.to_i == 1
      raise OcflTools::Errors::ValidationError, details: { "E015" => ["Expected version 1 not found. Found version #{first_version.to_i} instead."] }
    end

    version_format = "v%0#{first_version.length}d"
  end
  version_format
end

.get_versions_dir_files(object_root_dir, version1, version2) ⇒ Object

Given an object root and two versions, get the files on disk for that range of versions (inclusive)



38
39
40
41
42
43
44
45
46
47
48
49
50
# File 'lib/ocfl_tools/utils_file.rb', line 38

def self.get_versions_dir_files(object_root_dir, version1, version2)
  top_ver = [version1, version2].max
  bot_ver = [version1, version2].min
  all_files = []
  count = bot_ver       # start at the bottom
  until count > top_ver # count to the top
    all_files << OcflTools::Utils::Files.get_version_dir_files(object_root_dir, count)
    count += 1
  end
  raise 'No files found in version directories!' if all_files.empty?

  all_files.flatten!
end

.invert_and_expand(digest_hash) ⇒ Object

Given a [Hash] of digests and [ filepaths ], flip & expand to unique Filepath => digest.



124
125
126
127
128
129
130
131
132
133
134
135
# File 'lib/ocfl_tools/utils_file.rb', line 124

def self.invert_and_expand(digest_hash)
  raise 'This only works on Hashes, buck-o' unless digest_hash.is_a?(Hash)

  working_hash = OcflTools::Utils.deep_copy(digest_hash)
  return_hash = {}
  working_hash.each do |key, value|
    value.each do |v|
      return_hash[v] = key
    end
  end
  return_hash
end

.invert_and_expand_and_prepend(digest_hash, prepend_string) ⇒ Object

Given a hash of digest => [ Filepaths ], invert and expand, then prepend a string to all filepaths.



138
139
140
141
142
143
144
145
146
147
148
# File 'lib/ocfl_tools/utils_file.rb', line 138

def self.invert_and_expand_and_prepend(digest_hash, prepend_string)
  raise 'This only works on Hashes, buck-o' unless digest_hash.is_a?(Hash)

  return_hash = {}
  filepath_hash = OcflTools::Utils::Files.invert_and_expand(digest_hash)
  filepath_hash.each do |file, digest|
    filepaths = OcflTools::Utils::Files.expand_filepaths(file, prepend_string)
    return_hash[filepaths[0]] = digest
  end
  return_hash
end