Class: NdrPseudonymise::NdrEncrypt::Repository

Inherits:
Object
  • Object
show all
Defined in:
lib/ndr_pseudonymise/ndr_encrypt/repository.rb

Overview

Defines a local ndr_encrypt working copy

Constant Summary collapse

CSV_COLUMNS =

rubocop:disable Style/SlicingWithRange

%w[git_blobid path].freeze
ENCRYPTED_DIR =
'ndr_encrypted/'.freeze

Instance Method Summary collapse

Constructor Details

#initialize(repo_dir: nil) ⇒ Repository

Returns a new instance of Repository.

Raises:

  • (ArgumentError)


15
16
17
18
19
20
# File 'lib/ndr_pseudonymise/ndr_encrypt/repository.rb', line 15

def initialize(repo_dir: nil)
  # We need to support ruby 2.0 so cannot use required keyword arguments syntax
  raise(ArgumentError, 'missing keyword: :repo_dir') unless repo_dir

  @repo_dir = repo_dir
end

Instance Method Details

#add(paths, key_name: nil, pub_key: nil) ⇒ Object

Add file contents to the encrypted store and index

Raises:

  • (ArgumentError)


32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# File 'lib/ndr_pseudonymise/ndr_encrypt/repository.rb', line 32

def add(paths, key_name: nil, pub_key: nil)
  # We need to support ruby 2.0 so cannot use required keyword arguments syntax
  raise(ArgumentError, 'missing keyword: :key_name') unless key_name
  raise(ArgumentError, 'missing keyword: :pub_key') unless pub_key
  raise(ArgumentError, 'Invalid ndr_encrypted encrypted store') unless valid_repository?

  paths.each do |path|
    Find.find(path) do |fn|
      next unless File.file?(fn)

      real_encrypted_dir = File.realdirpath(File.join(@repo_dir, ENCRYPTED_DIR))
      next if File.realdirpath(fn).start_with?(real_encrypted_dir)

      git_blobid, _encrypted_id = hash_object(fn, key_name: key_name,
                                                  pub_key: pub_key, write: true)
      File.open(index_filename, 'ab') { |f| f << [git_blobid, fn].to_csv }
    end
  end
end

#cat_file(git_blobid, key_name: nil, private_key: nil, passin: nil) ⇒ Object

Retrieve local file(s) based on git_blobid

Raises:

  • (ArgumentError)


138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
# File 'lib/ndr_pseudonymise/ndr_encrypt/repository.rb', line 138

def cat_file(git_blobid, key_name: nil, private_key: nil, passin: nil)
  # We need to support ruby 2.0 so cannot use required keyword arguments syntax
  raise(ArgumentError, 'missing keyword: :key_name') unless key_name
  raise(ArgumentError, 'missing keyword: :private_key') unless private_key

  encrypted_id = NdrEncrypt::EncryptedObject.encrypted_id(git_blobid, key_name: key_name)
  encrypted_filename = File.join(object_dir, encrypted_id[0..1], encrypted_id[2..-1])
  unless File.exist?(encrypted_filename)
    raise(ArgumentError, 'File does not exist in encrypted storage')
  end

  rawdata = File.binread(encrypted_filename)
  contents = NdrEncrypt::EncryptedObject.decrypt(rawdata, private_key: private_key,
                                                          passin: passin)
  blob = NdrEncrypt::EncryptedObject.decompress(contents)
  NdrEncrypt::EncryptedObject.unpack_blob(blob)
end

#gc(output_stream: StringIO.new) ⇒ Object

Cleanup unnecessary index entries and optimize the encrypted store

Raises:

  • (ArgumentError)


53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# File 'lib/ndr_pseudonymise/ndr_encrypt/repository.rb', line 53

def gc(output_stream: StringIO.new)
  raise(ArgumentError, 'Invalid ndr_encrypted encrypted store') unless valid_repository?

  output_stream.print('Reading index: ')
  csv_data = CSV.read(index_filename)
  header = csv_data.shift
  raise(ArgumentError, 'Invalid header in index file') unless CSV_COLUMNS == header

  count0 = csv_data.size
  output_stream.print("#{count0} entries.\nRemoving duplicates: ")
  csv_data.each.with_index do |row, i|
    unless row.size == 2 && row[0] =~ /\A[0-9a-f]+\z/
      raise(ArgumentError, "Invalid index entry on data row #{i + 1}")
    end
  end
  csv_data = csv_data.sort.uniq
  count1 = csv_data.size
  output_stream.print("#{count1} entries remaining.\nWriting objects: ")
  # Move aside index file temporarily to reduce race conditions
  # Note: should use a proper lock file for all index interactions
  orig_filename = "#{index_filename}.orig"
  temp_filename = "#{index_filename}.new"
  FileUtils.mv(index_filename, "#{index_filename}.orig")
  CSV.open(temp_filename, 'wb') do |csv|
    csv << header
    csv_data.each { |row| csv << row }
  end
  FileUtils.mv(temp_filename, index_filename)
  FileUtils.rm(orig_filename)
  output_stream.puts("100% (#{count1}/#{count1}), done.\n")
  output_stream.puts("Total #{count1} (delta #{count0 - count1})")
end

#get(paths, key_name: nil, private_key: nil, passin: nil) ⇒ Object

Retrieve local file(s) based on CSV entry

Raises:

  • (ArgumentError)


87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# File 'lib/ndr_pseudonymise/ndr_encrypt/repository.rb', line 87

def get(paths, key_name: nil, private_key: nil, passin: nil)
  # We need to support ruby 2.0 so cannot use required keyword arguments syntax
  raise(ArgumentError, 'missing keyword: :key_name') unless key_name
  raise(ArgumentError, 'missing keyword: :private_key') unless private_key
  raise(ArgumentError, 'Invalid ndr_encrypted encrypted store') unless valid_repository?

  path_set = Set.new(paths)
  paths = path_set.to_a # Keep only unique entries
  found = Set.new # index may have duplicate objects if not garbage collected
  CSV.foreach(index_filename, headers: true) do |row|
    # Only keep first matching entry for each path
    if path_set.include?(row['path'])
      found << row
      path_set.delete(row['path'])
      break if path_set.empty?
    end
  end
  raise(ArgumentError, 'Cannot find some files') unless found.size == paths.size

  found.each do |row|
    data = cat_file(row['git_blobid'], key_name: key_name, private_key: private_key,
                                       passin: passin)
    File.open(row['path'], 'wb') { |f| f << data }
  end
end

#hash_object(path, key_name: nil, pub_key: nil, write: nil) ⇒ Object

Compute object IDs and optionally creates an encrypted object from a file Returns [git_blobid, encrypted_id]

Raises:

  • (ArgumentError)


115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# File 'lib/ndr_pseudonymise/ndr_encrypt/repository.rb', line 115

def hash_object(path, key_name: nil, pub_key: nil, write: nil)
  # We need to support ruby 2.0 so cannot use required keyword arguments syntax
  raise(ArgumentError, 'missing keyword: :key_name') unless key_name
  raise(ArgumentError, 'missing keyword: :pub_key') unless pub_key

  data = File.binread(path)
  blob = NdrEncrypt::EncryptedObject.blob(data)
  git_blobid = NdrEncrypt::EncryptedObject.digest(blob)
  encrypted_id = NdrEncrypt::EncryptedObject.encrypted_id(git_blobid, key_name: key_name)
  if write
    encrypted_dir = File.join(object_dir, encrypted_id[0..1])
    encrypted_filename = File.join(encrypted_dir, encrypted_id[2..-1])
    unless File.exist?(encrypted_filename) # Don't override existing file
      contents = NdrEncrypt::EncryptedObject.compress(blob)
      encrypted_contents = NdrEncrypt::EncryptedObject.encrypt(contents, pub_key: pub_key)
      FileUtils.mkdir_p(encrypted_dir)
      File.open(encrypted_filename, 'wb') { |f| f << encrypted_contents }
    end
  end
  [git_blobid, encrypted_id]
end

#initObject

Create directory structure



23
24
25
26
27
28
29
# File 'lib/ndr_pseudonymise/ndr_encrypt/repository.rb', line 23

def init
  FileUtils.mkdir_p(object_dir)
  return false if valid_repository?

  CSV.open(index_filename, 'wb') { |csv| csv << CSV_COLUMNS }
  true
end