Class: BridgeCache::Plugins::CSVDump

Inherits:
Object
  • Object
show all
Defined in:
app/lib/bridge_cache/plugins/csv_dump.rb

Constant Summary collapse

MAX_ROW_INTERVAL =
5000

Class Method Summary collapse

Class Method Details

.bulk_import(iterator, model) ⇒ Object



9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# File 'app/lib/bridge_cache/plugins/csv_dump.rb', line 9

def self.bulk_import(iterator, model)
  ids = []
  rows = []
  klass = "BridgeCache::#{model.camelcase}".constantize

  csv_column_names = klass.csv_mapping.keys
  database_column_names = klass.csv_mapping.values

  iterator.each_row(model.pluralize) do |row|
    row = remove_bad_columns(klass, BridgeCache::Plugins::DataTransform.set_bridge_id(row).to_h)
    row = klass.format_import_row(row)
    rows << csv_column_names.map { |column| row[column] }
    ids << row['bridge_id'] if row['bridge_id'].present? # Some CSV's do not have an ID column

    if rows.length >= BridgeCache.batch_size
      perform_bulk_import(klass, database_column_names, rows)
      rows = []
    end
  end

  perform_bulk_import(klass, database_column_names, rows)
  ids
end

.condition_sql(klass, columns) ⇒ Object

This method generates SQL that looks like: (users.sis_id, users.email) IS DISTINCT FROM (EXCLUDED.sis_id, EXCLUDED.email)

This prevents activerecord-import from setting the ‘updated_at` column for rows that haven’t actually changed. This allows you to query for rows that have changed by doing something like:

started_at = Time.now run_the_users_sync! changed = User.where(“updated_at >= ?”, started_at)



82
83
84
85
86
# File 'app/lib/bridge_cache/plugins/csv_dump.rb', line 82

def self.condition_sql(klass, columns)
  columns_str = columns.map { |c| "#{klass.quoted_table_name}.#{c}" }.join(', ')
  excluded_str = columns.map { |c| "EXCLUDED.#{c}" }.join(', ')
  "(#{columns_str}) IS DISTINCT FROM (#{excluded_str})"
end

.dump_row(clazz, row) ⇒ Object



56
57
58
59
# File 'app/lib/bridge_cache/plugins/csv_dump.rb', line 56

def self.dump_row(clazz, row)
  instance = initialze_row(clazz, row)
  dump_rows([instance])
end

.dump_rows(rows) ⇒ Object



95
96
97
98
99
# File 'app/lib/bridge_cache/plugins/csv_dump.rb', line 95

def self.dump_rows(rows)
  rows.each do |row|
    row.save! if row.changed?
  end
end

.dump_to_table(clazz, file_path) ⇒ Object



33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# File 'app/lib/bridge_cache/plugins/csv_dump.rb', line 33

def self.dump_to_table(clazz, file_path)
  count = 1
  total = 0
  rows = []

  CSV.foreach(file_path, headers: true) do |_row|
    total += 1
  end

  CSV.foreach(file_path, headers: true) do |row|
    rows << initialze_row(clazz, row) if count < MAX_ROW_INTERVAL

    if (count % MAX_ROW_INTERVAL).zero? || count == total
      dump_rows(rows)

      count = 0
      rows = []
    end

    count += 1
  end
end

.initialze_row(clazz, row) ⇒ Object



88
89
90
91
92
93
# File 'app/lib/bridge_cache/plugins/csv_dump.rb', line 88

def self.initialze_row(clazz, row)
  instance = clazz.find_or_create_by(bridge_id: row['id'])
  instance.assign_attributes(remove_bad_columns(clazz,
                                                BridgeCache::Plugins::DataTransform.set_bridge_id(row).to_h))
  instance
end

.perform_bulk_import(klass, columns, rows) ⇒ Object



61
62
63
64
65
66
67
68
69
70
# File 'app/lib/bridge_cache/plugins/csv_dump.rb', line 61

def self.perform_bulk_import(klass, columns, rows)
  return if rows.empty?

  columns = columns.dup
  klass.import(columns, rows, validate: false, on_duplicate_key_update: {
                 conflict_target: klass.unique_column_names,
                 condition: condition_sql(klass, columns),
                 columns: columns
               })
end

.remove_bad_columns(clazz, row) ⇒ Object



101
102
103
# File 'app/lib/bridge_cache/plugins/csv_dump.rb', line 101

def self.remove_bad_columns(clazz, row)
  row.delete_if { |key, _value| !clazz.column_names.include?(key) }
end