Module: FilePool

Defined in:
lib/file_pool.rb,
lib/file_pool/version.rb

Defined Under Namespace

Classes: InvalidFileId

Constant Summary collapse

VERSION =
"0.3.7"

Class Method Summary collapse

Class Method Details

.add(path, options = {}) ⇒ Object

Add a file to the file pool.

Same as FilePool.add!, but doesn’t throw exceptions.

Parameters:

source (String)

path of the file to add.

options (Hash)

:background (true,false) adding large files can take long (esp. with encryption), true won’t block, default is false

Return Value:

String containing a new unique ID for the file added.

false when the file could not be stored.



134
135
136
137
138
139
# File 'lib/file_pool.rb', line 134

def self.add path, options = {}
  self.add!(path, options)

rescue Exception
  return false
end

.add!(orig_path, options = {}) ⇒ Object

Add a file to the file pool.

Creates hard-links (ln) when file at path is on same file system as pool, otherwise copies it. When dealing with large files the latter should be avoided, because it takes more time and space.

Throws standard file exceptions when unable to store the file. See also FilePool.add to avoid it.

Parameters:

path (String)

path of the file to add.

options (Hash)

:background (true,false) adding large files can take long (esp. with encryption), true won’t block, default is false

Return Value:

String containing a new unique ID for the file added.



79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# File 'lib/file_pool.rb', line 79

def self.add! orig_path, options = {}
  newid = uuid

  child = fork do
    target = path newid

    if @@crypted_mode
      FileUtils.mkpath(id2dir_secured newid)
      path = crypt(orig_path)      
    else
      path = orig_path
      FileUtils.mkpath(id2dir newid)
    end

    if !@@copy_source and (File.stat(path).dev == File.stat(File.dirname(target)).dev)
      FileUtils.link(path, target)
    else
      FileUtils.copy(path, target)     
    end

    # don't chmod if orginal file is same as target (hard-linked) 
    if File.stat(orig_path).ino != File.stat(File.dirname(target)).ino
      FileUtils.chmod(@@mode, target) if @@mode
      FileUtils.chown(@@owner, @@group, target)
    end
  end


  if options[:background]
    # don't wait, avoid zombies
    Process.detach(pid)
  else
    # block until done
    Process.waitpid(pid) 
  end

  newid
end

.configure(config_file) ⇒ Object

Retrieves configuration from config file or creates a new one in case there’s none available.



391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
# File 'lib/file_pool.rb', line 391

def self.configure config_file
  unless config_file.nil?
    @@crypted_mode = true
    begin
      config = YAML.load_file(config_file)
      @@iv  = config[:iv]
      @@key = config[:key]
    rescue Errno::ENOENT
      cipher = OpenSSL::Cipher::AES.new(256, :CBC)
      @@iv  = cipher.random_iv
      @@key = cipher.random_key
      cipher.key = @@key
      cfg = File.open(config_file, 'w')
      cfg.write({:iv => @@iv, :key => @@key}.to_yaml)
      cfg.close
      File.chmod(0400, config_file)
    rescue => other_error
      raise "FilePool: Could not load secrets from #{config_file}: #{other_error}"
    end
  end
end

.create_cipherObject

Creates a cipher to encrypt data.

Returns the cipher.

Return Value:

*Openssl*Cipher object.



363
364
365
366
367
368
369
# File 'lib/file_pool.rb', line 363

def self.create_cipher
  cipher = OpenSSL::Cipher::AES.new(256, :CBC)
  cipher.encrypt
  cipher.key = @@key
  cipher.iv  = @@iv
  cipher
end

.create_decipherObject

Creates a decipher to decrypt data.

Returns the decipher.

Return Value:

*Openssl*Cipher object



379
380
381
382
383
384
385
# File 'lib/file_pool.rb', line 379

def self.create_decipher
  decipher = OpenSSL::Cipher::AES.new(256, :CBC)
  decipher.decrypt
  decipher.key = @@key
  decipher.iv  = @@iv
  decipher
end

.crypt(path) ⇒ Object

Crypt a file and store the result in the temp.

Returns the path to the crypted file.

Parameters:

path (String)

path of the file to crypt.

Return Value:

*String*Path and name of the crypted file.



302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
# File 'lib/file_pool.rb', line 302

def self.crypt path
  # Crypt the file in the temp folder and copy after
  cipher = create_cipher
  result = Tempfile.new 'FilePool-encrypt'

  buf = ''

  File.open(path) do |inf|
    while inf.read(@@block_size, buf)
      result << cipher.update(buf)
      result.flush
      result.fsync
    end
    result << cipher.final
  end

  result.close
  result.path
end

.decrypt(path) ⇒ Object

Decrypt a file and give a path to it.

Returns the path to the decrypted file.

Parameters:

path (String)

path of the file to decrypt.

Return Value:

*String*Path and name of the crypted file.



335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
# File 'lib/file_pool.rb', line 335

def self.decrypt path
  decipher = create_decipher
  # Now decrypt the data:
  output = Tempfile.new 'FilePool-decrypt'

  buf = ''

  File.open(path) do |inf|
    while inf.read(@@block_size, buf)
      output << decipher.update(buf)
      output.flush
      output.fsync
    end
    output << decipher.final
  end

  output.open # re-open for reading, prevents early deletion of tempfile
  output.path
end

.encrypted?(fid) ⇒ Boolean

Tell wehther a file was stored with encryption. (checks for presence in the secured part of the file pool. If the file was actually encrypted cannot be answered)

Returns:

  • (Boolean)


418
419
420
# File 'lib/file_pool.rb', line 418

def self.encrypted? fid
  File.file?(id2dir_secured(fid) + "/#{fid}")
end

.id2dir(fid) ⇒ Object

path from fid without file name



257
258
259
# File 'lib/file_pool.rb', line 257

def self.id2dir fid
  "#{root}/#{fid[0,1]}/#{fid[1,1]}/#{fid[2,1]}"
end

.id2dir_secured(fid) ⇒ Object

secured path from fid without file name



262
263
264
# File 'lib/file_pool.rb', line 262

def self.id2dir_secured fid
  "#{root}_secured/#{fid[0,1]}/#{fid[1,1]}/#{fid[2,1]}"
end

.median(sizes) ⇒ Object

median file size



281
282
283
284
285
286
287
# File 'lib/file_pool.rb', line 281

def self.median(sizes)
  arr = sizes
  sortedarr = arr.sort
  medpt1 = arr.length / 2
  medpt2 = (arr.length+1)/2
  (sortedarr[medpt1] + sortedarr[medpt2]).to_f / 2
end

.path(fid, options = {}) ⇒ Object

Return the file’s path corresponding to the passed file ID, no matter if it exists or not. In encrypting mode the file is first decrypted and the returned path will point to a temporary location of the decrypted file.

To get the path of the encrypted file pass :decrypt => false, as an option.

Parameters:

fid (String)

File ID which was generated by a previous #add operation.

options (Hash)

:decrypt (true,false) In encryption mode don’t decrypt, but return the encrypted file’s path. Defaults to true.

Return Value:

String, absolute path of the file in the pool or to temporary location if it was decrypted.

Raises:



159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
# File 'lib/file_pool.rb', line 159

def self.path fid, options={}
  options[:decrypt] = true unless options[:decrypt] == false

  raise InvalidFileId unless valid?(fid)

  # file present in pool?
  if File.file?(id2dir_secured(fid) + "/#{fid}")
    # present in secured tree
    if @@crypted_mode
      if options[:decrypt]
        # return path of decrypted file (tmp path)
        decrypt id2dir_secured(fid) + "/#{fid}"
      else
        id2dir_secured(fid) + "/#{fid}"
      end
    else
      id2dir_secured(fid) + "/#{fid}"
    end
  elsif File.file?(id2dir(fid) + "/#{fid}")
    # present in plain tree
    id2dir(fid) + "/#{fid}"
  else
    # not present
    if @@crypted_mode
      id2dir_secured(fid) + "/#{fid}"
    else
      id2dir(fid) + "/#{fid}"
    end
  end
end

.remove(fid) ⇒ Object

Remove a previously added file by its ID. Same as FilePool.remove!, but doesn’t throw exceptions.

Parameters:

fid (String)

File ID which was generated by a previous #add operation.

Return Value:

Boolean, true if file was removed successfully, false else



214
215
216
217
218
# File 'lib/file_pool.rb', line 214

def self.remove fid
  self.remove! fid
rescue Exception => ex
  return false
end

.remove!(fid) ⇒ Object

Remove a previously added file by its ID. Same as FilePool.remove, but throws exceptions on failure.

Parameters:

fid (String)

File ID which was generated by a previous #add operation.



198
199
200
# File 'lib/file_pool.rb', line 198

def self.remove! fid
  FileUtils.rm path(fid, :decrypt => false)
end

.rootObject



252
253
254
# File 'lib/file_pool.rb', line 252

def self.root
  @@root rescue raise("FilePool: no root directory defined. Use FilePool#setup.")
end

.setup(root, options = {}) ⇒ Object

Setup the root directory of the file pool and configure encryption

Parameters:

root (String)

absolute path of the file pool’s root directory under which all files will be stored.

config_file_path (String)

path to the config file of the filepool.

options (Hash)
  • :secrets_file (String) path to file containing key and IV for encryption (if omitted FilePool does not encrypt/decrypt). If file is not present, the file is initialized with a new random key and IV.

  • :encryption_block_size (Integer) sets the block size for encryption/decryption in bytes. Larger blocks need more memory and less time (less IO). Defaults to 1’048’576 (1 MiB).

  • :copy_source (true,false) if false files added to the pool are hard-linked with the source if source and file pool are on the same file system (default). If set to true files are always copied into the pool.

  • :mode (Integer) File mode to set on all files added to the pool. E.g. mode: 0640 for rw-r----- or symbolic “u=wrx,go=rx” (see Ruby stdlib FileUtils#chmod). Note that the desired mode is not set if the file is hard-linked with the source. Use copy_source:true when to ensure.

  • :owner Owner of the files added to the pool. Note that the desired owner is not set if the file is hard-linked with the source. Use copy_source:true when to ensure.

  • :group Group of the files added to the pool. Note that the desired group is not set if the file is hard-linked with the source. Use copy_source:true when to ensure.



45
46
47
48
49
50
51
52
53
54
55
56
57
# File 'lib/file_pool.rb', line 45

def self.setup root, options={}
  unless(unknown = options.keys - [:encryption_block_size, :secrets_file, :copy_source, :mode, :owner, :group]).empty?
    puts "FilePool Warning: unknown option(s) passed to #setup: #{unknown.inspect}"
  end
  @@root = root
  @@crypted_mode = false
  @@block_size = options[:encryption_block_size] || (1024*1024)
  @@copy_source = options[:copy_source] || false
  @@mode = options[:mode]
  @@group = options[:group]
  @@owner = options[:owner]
  configure options[:secrets_file]
end

.statObject

Returns some statistics about the current pool. (It may be slow if the pool contains very many files as it computes them from scratch.)

Return Value

Hash with keys

:total_number (Integer)

Number of files in pool

:total_size (Integer)

Total number of bytes of all files

:median_size (Float)

Median of file sizes (most frequent size)

:last_add (Time)

Time and Date of last add operation



236
237
238
239
240
241
242
243
244
245
246
247
# File 'lib/file_pool.rb', line 236

def self.stat
  all_files = Dir.glob("#{root}_secured/*/*/*/*")
  all_files << Dir.glob("#{root}/*/*/*/*")
  all_stats = all_files.map{|f| File.stat(f) }

  {
    :total_size => all_stats.inject(0){|sum,stat| sum+=stat.size},
    :median_size => median(all_stats.map{|stat| stat.size}),
    :file_number => all_files.length,
    :last_add => all_stats.map{|stat| stat.ctime}.max
  }
end

.uuidObject

return a new UUID type 4 (random) as String



267
268
269
# File 'lib/file_pool.rb', line 267

def self.uuid
  UUIDTools::UUID.random_create.to_s
end

.valid?(uuid) ⇒ Boolean

return true if uuid is a valid UUID type 4

Returns:

  • (Boolean)


272
273
274
275
276
277
278
# File 'lib/file_pool.rb', line 272

def self.valid? uuid
  begin
    UUIDTools::UUID.parse(uuid).valid?
  rescue TypeError, ArgumentError
    return false
  end
end