Class: PseudonymisedFileWrapper
- Inherits:
-
Object
- Object
- PseudonymisedFileWrapper
- Defined in:
- lib/ndr_pseudonymise/pseudonymised_file_wrapper.rb
Overview
Provide the ability to extract fieldnames and create CSV output from .pseudo files
Instance Method Summary collapse
- #available_fields ⇒ Object
-
#initialize(filename) ⇒ PseudonymisedFileWrapper
constructor
A new instance of PseudonymisedFileWrapper.
- #pretty_data ⇒ Object
-
#pretty_write ⇒ Object
Create an excel-readable CSV file, in the same location as the original.
-
#process ⇒ Object
Read in the source file, accumulating all the field names used in any row.
Constructor Details
#initialize(filename) ⇒ PseudonymisedFileWrapper
Returns a new instance of PseudonymisedFileWrapper.
24 25 26 27 |
# File 'lib/ndr_pseudonymise/pseudonymised_file_wrapper.rb', line 24 def initialize(filename) @filename = filename @logger = Logger.new(STDOUT) end |
Instance Method Details
#available_fields ⇒ Object
29 30 31 |
# File 'lib/ndr_pseudonymise/pseudonymised_file_wrapper.rb', line 29 def available_fields (@all_fields1 + @all_fields2).sort.uniq end |
#pretty_data ⇒ Object
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
# File 'lib/ndr_pseudonymise/pseudonymised_file_wrapper.rb', line 81 def pretty_data csv_string = CSV.generate do |csv| headers = (@all_fields1.map { |name| "mapped:#{name}" } + @all_fields2.map { |name| "raw:#{name}" } + %w(pseudo_id1 pseudo_id2 key_bundle)) csv << headers @processed_lines.each do |line| output_fields = @all_fields1.map { |field| line[:map1][field] } + @all_fields2.map { |field| line[:map2][field] } output_fields.push(line[:id1], line[:id2], line[:keys]) csv << output_fields end end csv_string end |
#pretty_write ⇒ Object
Create an excel-readable CSV file, in the same location as the original
63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
# File 'lib/ndr_pseudonymise/pseudonymised_file_wrapper.rb', line 63 def pretty_write /(?<base_name>.*)\.(?:csv|(?:zip|xlsx?)\.pseudo)/i.match(@filename) target_filename = "#{$LAST_MATCH_INFO[:base_name]}_pretty.csv" @logger.debug "Writing output to #{target_filename}" CSV.open(target_filename, 'w') do |file| headers = (@all_fields1.map { |name| "mapped:#{name}" } + @all_fields2.map { |name| "raw:#{name}" } + %w(pseudo_id1 pseudo_id2 key_bundle)) file << headers @processed_lines.each do |line| output_fields = @all_fields1.map { |field| line[:map1][field] } + @all_fields2.map { |field| line[:map2][field] } output_fields.push(line[:id1], line[:id2], line[:keys]) file << output_fields end end end |
#process ⇒ Object
Read in the source file, accumulating all the field names used in any row
34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
# File 'lib/ndr_pseudonymise/pseudonymised_file_wrapper.rb', line 34 def process line_counter = 1 processed_lines = [] all_fields1 = [] all_fields2 = [] CSV.foreach(@filename) do |row| if row.size == 1 # Header; do nothing elsif row.size == 7 cur = { map1: JSON.parse(row[4]), map2: JSON.parse(row[6]), id1: row[0], id2: row[1], keys: row[2] } processed_lines.push(cur) all_fields1.push(*cur[:map1].keys).uniq! all_fields2.push(*cur[:map2].keys).uniq! else @logger.debug"Line #{line_counter} contained unexpected number of fields: #{row.size}" end line_counter += 1 end @lines = line_counter @all_fields1 = all_fields1 @all_fields2 = all_fields2 @processed_lines = processed_lines end |