Class: GeocodeRecords::UpdateTableFromCsv
- Inherits:
-
Object
- Object
- GeocodeRecords::UpdateTableFromCsv
- Defined in:
- lib/geocode_records/update_table_from_csv.rb
Constant Summary collapse
- CREATE_TABLE_SQL =
(<<-SQL).gsub(' ', '').freeze CREATE TABLE $TMP_TABLE_NAME ( id uuid primary key, ss_street_predirection text, ss_street_name text, ss_street_suffix text, ss_street_postdirection text, ss_delivery_line_1 text, ss_primary_number text, ss_secondary_number text, ss_city_name text, ss_state_abbreviation text, ss_zipcode text, ss_plus4_code text, ss_latitude float, ss_longitude float, ss_default_city_name text, ss_rdi text ) SQL
- DESIRED_COLUMNS =
%w{ id ss_street_predirection ss_street_name ss_street_suffix ss_street_postdirection ss_delivery_line_1 ss_primary_number ss_secondary_number ss_city_name ss_state_abbreviation ss_zipcode ss_plus4_code ss_latitude ss_longitude ss_default_city_name ss_rdi }
- COPY_SQL =
"\\copy $TMP_TABLE_NAME (#{DESIRED_COLUMNS.join(',')}) FROM '$PATH' DELIMITER ',' CSV HEADER"
- UPDATE_TABLE_SQL =
(<<-SQL).gsub(' ', '').freeze UPDATE $TABLE_NAME AS target SET street$NUM_SUFFIX = concat_ws(' ', src.ss_street_predirection, src.ss_street_name, src.ss_street_suffix, src.ss_street_postdirection), house_number_and_street$NUM_SUFFIX = src.ss_delivery_line_1, house_number$NUM_SUFFIX = CASE WHEN src.ss_primary_number IS NULL THEN NULL WHEN LENGTH(src.ss_primary_number) > 8 THEN NULL WHEN src.ss_primary_number ~ '\\A\\d+\\Z' THEN src.ss_primary_number::int WHEN src.ss_primary_number ~ '/' THEN (SELECT regexp_matches(src.ss_primary_number, '(\\d+)'))[1]::int WHEN src.ss_primary_number ~ '-' THEN (SELECT ROUND(AVG(v)) FROM unnest(array_remove(regexp_split_to_array(src.ss_primary_number, '\\D+'), '')::int[]) v) ELSE (SELECT regexp_matches(src.ss_primary_number, '(\\d+)'))[1]::int END, unit_number$NUM_SUFFIX = src.ss_secondary_number, city$NUM_SUFFIX = COALESCE(src.ss_default_city_name, src.ss_city_name), state$NUM_SUFFIX = src.ss_state_abbreviation, postcode$NUM_SUFFIX = src.ss_zipcode, postcode_zip4$NUM_SUFFIX = src.ss_plus4_code, latitude$NUM_SUFFIX = src.ss_latitude, longitude$NUM_SUFFIX = src.ss_longitude, rdi$NUM_SUFFIX = src.ss_rdi FROM $TMP_TABLE_NAME AS src WHERE target.id = src.id AND src.ss_zipcode IS NOT NULL SQL
Instance Attribute Summary collapse
-
#database_url ⇒ Object
readonly
Returns the value of attribute database_url.
-
#num ⇒ Object
readonly
Returns the value of attribute num.
-
#path ⇒ Object
readonly
Returns the value of attribute path.
-
#table_name ⇒ Object
readonly
Returns the value of attribute table_name.
Instance Method Summary collapse
- #create_tmp_table ⇒ Object
- #delete_tmp_table(tmp_table_name) ⇒ Object
-
#initialize(database_url:, table_name:, path:, num:) ⇒ UpdateTableFromCsv
constructor
A new instance of UpdateTableFromCsv.
- #load_csv_into_tmp_table(path:, table_name:) ⇒ Object
- #perform ⇒ Object
- #strip_csv ⇒ Object
- #update_original_table(tmp_table_name) ⇒ Object
Constructor Details
#initialize(database_url:, table_name:, path:, num:) ⇒ UpdateTableFromCsv
Returns a new instance of UpdateTableFromCsv.
77 78 79 80 81 82 83 84 85 86 87 |
# File 'lib/geocode_records/update_table_from_csv.rb', line 77 def initialize( database_url:, table_name:, path:, num: ) @database_url = database_url @table_name = table_name @path = path @num = num end |
Instance Attribute Details
#database_url ⇒ Object (readonly)
Returns the value of attribute database_url.
72 73 74 |
# File 'lib/geocode_records/update_table_from_csv.rb', line 72 def database_url @database_url end |
#num ⇒ Object (readonly)
Returns the value of attribute num.
75 76 77 |
# File 'lib/geocode_records/update_table_from_csv.rb', line 75 def num @num end |
#path ⇒ Object (readonly)
Returns the value of attribute path.
74 75 76 |
# File 'lib/geocode_records/update_table_from_csv.rb', line 74 def path @path end |
#table_name ⇒ Object (readonly)
Returns the value of attribute table_name.
73 74 75 |
# File 'lib/geocode_records/update_table_from_csv.rb', line 73 def table_name @table_name end |
Instance Method Details
#create_tmp_table ⇒ Object
101 102 103 104 105 106 107 108 |
# File 'lib/geocode_records/update_table_from_csv.rb', line 101 def create_tmp_table memo = "geocode_records_#{table_name}_#{rand(999999)}".gsub(/[^a-z0-9_]/i, '') GeocodeRecords.psql( database_url, CREATE_TABLE_SQL.gsub('$TMP_TABLE_NAME', memo) ) memo end |
#delete_tmp_table(tmp_table_name) ⇒ Object
136 137 138 139 140 141 |
# File 'lib/geocode_records/update_table_from_csv.rb', line 136 def delete_tmp_table(tmp_table_name) GeocodeRecords.psql( database_url, "DROP TABLE IF EXISTS #{tmp_table_name}" ) end |
#load_csv_into_tmp_table(path:, table_name:) ⇒ Object
121 122 123 124 125 126 |
# File 'lib/geocode_records/update_table_from_csv.rb', line 121 def load_csv_into_tmp_table(path:, table_name:) GeocodeRecords.psql( database_url, COPY_SQL.gsub('$TMP_TABLE_NAME', table_name).gsub('$PATH', path) ) end |
#perform ⇒ Object
89 90 91 92 93 94 95 96 97 98 99 |
# File 'lib/geocode_records/update_table_from_csv.rb', line 89 def perform return unless File.size(path) > 32 tmp_table_name = create_tmp_table begin tmp_csv_path = strip_csv load_csv_into_tmp_table path: tmp_csv_path, table_name: tmp_table_name update_original_table tmp_table_name ensure delete_tmp_table tmp_table_name end end |
#strip_csv ⇒ Object
110 111 112 113 114 115 116 117 118 119 |
# File 'lib/geocode_records/update_table_from_csv.rb', line 110 def strip_csv memo = GeocodeRecords.new_tmp_path('stripped') system( 'xsv', 'select', DESIRED_COLUMNS.join(','), path, out: memo ) or raise("xsv failed") memo end |
#update_original_table(tmp_table_name) ⇒ Object
128 129 130 131 132 133 134 |
# File 'lib/geocode_records/update_table_from_csv.rb', line 128 def update_original_table(tmp_table_name) num_suffix = (num == 1 ? '' : num.to_s) GeocodeRecords.psql( database_url, UPDATE_TABLE_SQL.gsub('$TMP_TABLE_NAME', tmp_table_name).gsub('$TABLE_NAME', table_name).gsub('$NUM_SUFFIX', num_suffix) ) end |