Class: NdrPseudonymise::DemographicsOnlyPseudonymiser

Inherits:
PseudonymisationSpecification show all
Defined in:
lib/ndr_pseudonymise/demographics_only_pseudonymiser.rb

Overview

Pseudonymise prescription data

Constant Summary collapse

PREAMBLE_V2_DEMOG_ONLY =
'Pseudonymised matching data v2.0-demog-only'.freeze

Constants inherited from PseudonymisationSpecification

PseudonymisationSpecification::HEADER_ROW_PREFIX, PseudonymisationSpecification::KEY_BYTES, PseudonymisationSpecification::PREAMBLE_V1_STRIPED

Instance Method Summary collapse

Methods inherited from PseudonymisationSpecification

#all_demographics, #clinical_data, #core_demographics, #data_hash, #decrypt_data, #decrypt_to_csv, #encrypt_data, factory, get_key_bundle, #header_row?, #initialize, #pseudo_id, #pseudonymise_csv, #random_key, #real_ids, #row_errors, #safe_json

Constructor Details

This class inherits a constructor from NdrPseudonymise::PseudonymisationSpecification

Instance Method Details

#csv_header_rowObject

Header row for CSV data



79
80
81
# File 'lib/ndr_pseudonymise/demographics_only_pseudonymiser.rb', line 79

def csv_header_row
  [PREAMBLE_V2_DEMOG_ONLY]
end

#emit_csv_rows(out_csv, pseudonymised_row) ⇒ Object

Append the output of pseudonymise_row to a CSV file



84
85
86
# File 'lib/ndr_pseudonymise/demographics_only_pseudonymiser.rb', line 84

def emit_csv_rows(out_csv, pseudonymised_row)
  out_csv << pseudonymised_row[0]
end

#pseudonymise_row(row) ⇒ Object

[packed_pseudoid_and_demographics, clinical_data1, …]

Where packed_pseudoid_and_demographics consists of “pseudo_id1 (key_bundle) packed_pseudoid_and_demographics”



20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# File 'lib/ndr_pseudonymise/demographics_only_pseudonymiser.rb', line 20

def pseudonymise_row(row)
  @key_cache ||= {} # Cache pseudonymisation keys for more compact import
  # all_demographics = { 'nhsnumber' => row[0], 'birthdate' => row[1] }
  all_demographics_hash = {}
  demographics_cols = @format_spec[:demographics]
  row.each_with_index do |x, i|
    row_spec = @format_spec[:columns][i]
    all_demographics_hash[row_spec[:title]] = x if demographics_cols.include?(i)
  end

  # TODO: Refactor date handling into parent class's all_demographics_hash method
  demographics_cols = @format_spec[:demographics]
  row.each_with_index do |x, i|
    row_spec = @format_spec[:columns][i]
    if row_spec[:canonical_title]
      if x.present? && row_spec[:strptime] && row_spec[:strftime]
        # :strptime can contain a String (single format) or Array (a list of formats)
        # :strftime contains a single string format
        datetime = false
        [row_spec[:strptime]].flatten(1).each do |format|
          begin
            datetime = DateTime.strptime(x, format)
            break
          rescue ArgumentError # Keep trying after invalid date formats
          end
        end
        raise ArgumentError.new('Invalid date') if datetime == false # No formats matched
        val = datetime.strftime(row_spec[:strftime])
      else
        val = x
      end
      all_demographics_hash[row_spec[:canonical_title]] = val
    end

  end

  # Ensure NHS number is empty string (expected by SimplePseudonymisation), not nil
  all_demographics_hash['nhsnumber'] = all_demographics_hash['nhsnumber'].to_s
  nhsnumber = all_demographics_hash['nhsnumber']
  birthdate = all_demographics_hash['birthdate']
  key = all_demographics_hash.to_json
  if @key_cache.key?(key)
    pseudo_id1, key_bundle, demog_key = @key_cache[key]
  else
    pseudo_id1, key_bundle, demog_key = NdrPseudonymise::SimplePseudonymisation.
                                        generate_keys_nhsnumber_demog_only(@salt1, @salt2, nhsnumber)
    if !nhsnumber.to_s.empty? && !birthdate.to_s.empty? # && false to stop caching
      @key_cache = {} if @key_cache.size > 1000 # Limit cache size
      @key_cache[key] = [pseudo_id1, key_bundle, demog_key]
    end
  end
  encrypted_demographics = NdrPseudonymise::SimplePseudonymisation.
                           encrypt_data64(demog_key, all_demographics_hash.to_json)
  packed_pseudoid_and_demographics = format('%s (%s) %s', pseudo_id1, key_bundle,
                                            encrypted_demographics)
  [[packed_pseudoid_and_demographics] + clinical_data(row)]
end