Class: GeocodeRecords::UpdateTableFromCsv

Inherits:
Object
  • Object
show all
Defined in:
lib/geocode_records/update_table_from_csv.rb

Constant Summary collapse

CREATE_TABLE_SQL =
(<<-SQL).gsub('      ', '').freeze
  CREATE TABLE $TMP_TABLE_NAME (
    id uuid primary key,
    ss_street_predirection text,
    ss_street_name text,
    ss_street_suffix text,
    ss_street_postdirection text,
    ss_delivery_line_1 text,
    ss_primary_number text,
    ss_secondary_number text,
    ss_city_name text,
    ss_state_abbreviation text,
    ss_zipcode text,
    ss_plus4_code text,
    ss_latitude float,
    ss_longitude float,
    ss_default_city_name text,
    ss_rdi text
  )
SQL
DESIRED_COLUMNS =
%w{
  id
  ss_street_predirection
  ss_street_name
  ss_street_suffix
  ss_street_postdirection
  ss_delivery_line_1
  ss_primary_number
  ss_secondary_number
  ss_city_name
  ss_state_abbreviation
  ss_zipcode
  ss_plus4_code
  ss_latitude
  ss_longitude
  ss_default_city_name
  ss_rdi
}
COPY_SQL =
"\\copy $TMP_TABLE_NAME (#{DESIRED_COLUMNS.join(',')}) FROM '$PATH' DELIMITER ',' CSV HEADER"
UPDATE_TABLE_SQL =
(<<-SQL).gsub('      ', '').freeze
  UPDATE $TABLE_NAME AS target
  SET
    street$NUM_SUFFIX = concat_ws(' ', src.ss_street_predirection, src.ss_street_name, src.ss_street_suffix, src.ss_street_postdirection),
    house_number_and_street$NUM_SUFFIX = src.ss_delivery_line_1,
    house_number$NUM_SUFFIX = CASE
      WHEN src.ss_primary_number IS NULL THEN NULL
      WHEN LENGTH(src.ss_primary_number) > 8 THEN NULL
      WHEN src.ss_primary_number ~ '\\A\\d+\\Z' THEN src.ss_primary_number::int
      WHEN src.ss_primary_number ~ '/' THEN (SELECT regexp_matches(src.ss_primary_number, '(\\d+)'))[1]::int
      WHEN src.ss_primary_number ~ '-' THEN (SELECT ROUND(AVG(v)) FROM unnest(array_remove(regexp_split_to_array(src.ss_primary_number, '\\D+'), '')::int[]) v)
      ELSE (SELECT regexp_matches(src.ss_primary_number, '(\\d+)'))[1]::int
    END,
    unit_number$NUM_SUFFIX = src.ss_secondary_number,
    city$NUM_SUFFIX = COALESCE(src.ss_default_city_name, src.ss_city_name),
    state$NUM_SUFFIX = src.ss_state_abbreviation,
    postcode$NUM_SUFFIX = src.ss_zipcode,
    postcode_zip4$NUM_SUFFIX = src.ss_plus4_code,
    latitude$NUM_SUFFIX = src.ss_latitude,
    longitude$NUM_SUFFIX = src.ss_longitude,
    rdi$NUM_SUFFIX = src.ss_rdi
  FROM $TMP_TABLE_NAME AS src
  WHERE
        target.id = src.id
    AND src.ss_zipcode IS NOT NULL
SQL

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(database_url:, table_name:, path:, num:) ⇒ UpdateTableFromCsv

Returns a new instance of UpdateTableFromCsv.



77
78
79
80
81
82
83
84
85
86
87
# File 'lib/geocode_records/update_table_from_csv.rb', line 77

def initialize(
  database_url:,
  table_name:,
  path:,
  num:
)
  @database_url = database_url
  @table_name = table_name
  @path = path
  @num = num
end

Instance Attribute Details

#database_urlObject (readonly)

Returns the value of attribute database_url.



72
73
74
# File 'lib/geocode_records/update_table_from_csv.rb', line 72

def database_url
  @database_url
end

#numObject (readonly)

Returns the value of attribute num.



75
76
77
# File 'lib/geocode_records/update_table_from_csv.rb', line 75

def num
  @num
end

#pathObject (readonly)

Returns the value of attribute path.



74
75
76
# File 'lib/geocode_records/update_table_from_csv.rb', line 74

def path
  @path
end

#table_nameObject (readonly)

Returns the value of attribute table_name.



73
74
75
# File 'lib/geocode_records/update_table_from_csv.rb', line 73

def table_name
  @table_name
end

Instance Method Details

#create_tmp_tableObject



101
102
103
104
105
106
107
108
# File 'lib/geocode_records/update_table_from_csv.rb', line 101

def create_tmp_table
  memo = "geocode_records_#{table_name}_#{rand(999999)}".gsub(/[^a-z0-9_]/i, '')
  GeocodeRecords.psql(
    database_url,
    CREATE_TABLE_SQL.gsub('$TMP_TABLE_NAME', memo)
  )
  memo
end

#delete_tmp_table(tmp_table_name) ⇒ Object



136
137
138
139
140
141
# File 'lib/geocode_records/update_table_from_csv.rb', line 136

def delete_tmp_table(tmp_table_name)
  GeocodeRecords.psql(
    database_url,
    "DROP TABLE IF EXISTS #{tmp_table_name}"
  )
end

#load_csv_into_tmp_table(path:, table_name:) ⇒ Object



121
122
123
124
125
126
# File 'lib/geocode_records/update_table_from_csv.rb', line 121

def load_csv_into_tmp_table(path:, table_name:)
  GeocodeRecords.psql(
    database_url,
    COPY_SQL.gsub('$TMP_TABLE_NAME', table_name).gsub('$PATH', path)
  )
end

#performObject



89
90
91
92
93
94
95
96
97
98
99
# File 'lib/geocode_records/update_table_from_csv.rb', line 89

def perform
  return unless File.size(path) > 32
  tmp_table_name = create_tmp_table
  begin
    tmp_csv_path = strip_csv
    load_csv_into_tmp_table path: tmp_csv_path, table_name: tmp_table_name
    update_original_table tmp_table_name
  ensure
    delete_tmp_table tmp_table_name
  end
end

#strip_csvObject



110
111
112
113
114
115
116
117
118
119
# File 'lib/geocode_records/update_table_from_csv.rb', line 110

def strip_csv
  memo = GeocodeRecords.new_tmp_path('stripped')
  system(
    'xsv',
    'select', DESIRED_COLUMNS.join(','),
    path,
    out: memo
  ) or raise("xsv failed")
  memo
end

#update_original_table(tmp_table_name) ⇒ Object



128
129
130
131
132
133
134
# File 'lib/geocode_records/update_table_from_csv.rb', line 128

def update_original_table(tmp_table_name)
  num_suffix = (num == 1 ? '' : num.to_s)
  GeocodeRecords.psql(
    database_url,
    UPDATE_TABLE_SQL.gsub('$TMP_TABLE_NAME', tmp_table_name).gsub('$TABLE_NAME', table_name).gsub('$NUM_SUFFIX', num_suffix)
  )
end