Class: DirectoryDiff::Transformer::TempTable

Inherits:
Object
  • Object
show all
Defined in:
lib/directory_diff/transformer/temp_table.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(current_directory) ⇒ TempTable

Returns a new instance of TempTable.



18
19
20
21
# File 'lib/directory_diff/transformer/temp_table.rb', line 18

def initialize(current_directory)
  @current_directory = current_directory
  @operations = []
end

Instance Attribute Details

#current_directoryObject (readonly)

Returns the value of attribute current_directory.



12
13
14
# File 'lib/directory_diff/transformer/temp_table.rb', line 12

def current_directory
  @current_directory
end

#operationsObject (readonly)

Returns the value of attribute operations.



12
13
14
# File 'lib/directory_diff/transformer/temp_table.rb', line 12

def operations
  @operations
end

Instance Method Details

#into(new_directory, options = {}) ⇒ Object

Parameters:

  • new_directory

    a table containing only the new records to compare against, most likely a temp table.



25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
# File 'lib/directory_diff/transformer/temp_table.rb', line 25

def into(new_directory, options = {})
  projection = "    name, \n    lower(email) email, \n    coalesce(phone_number, '') phone_number,\n    array_remove(\n      regexp_split_to_array(\n        coalesce(assistants, ''),\n        '\\s*,\\s*'\n      )::varchar[],\n      ''\n    ) assistants\n  SQL\n  current_directory.select(projection).temporary_table do |temp_current_directory|\n    # Remove dupe email rows, keeping the last one\n    latest_unique_sql = <<-SQL\n      SELECT \n        DISTINCT ON (lower(email)) name, \n        lower(email) email,\n        coalesce(phone_number, '') phone_number,\n        array_remove(\n          regexp_split_to_array(\n            coalesce(assistants, ''),\n            '\\s*,\\s*'\n          )::varchar[],\n          ''\n        ) assistants, \n        extra, \n        ROW_NUMBER () OVER ()\n      FROM \n        \#{new_directory.arel_table.name} \n      ORDER BY \n        lower(email), \n        row_number desc\n    SQL\n\n    new_directory.select('*')\n      .from(Arel.sql(\"(\#{latest_unique_sql}) as t\"))\n      .order(\"row_number\").temporary_table do |deduped_csv|\n      # Get Arel tables for referencing fields, table names\n      employees = temp_current_directory.table\n      csv = deduped_csv.table\n\n      # Reusable Arel predicates\n      csv_employee_join = csv[:email].eq(employees[:email])\n      attributes_unchanged = employees[:name].eq(csv[:name])\n                              .and(employees[:phone_number].eq(csv[:phone_number]))\n                              .and(employees[:assistants].contains(csv[:assistants]))\n\n      # Creates joins between the temp table and the csv table and\n      # vice versa\n      # Cribbed from https://gist.github.com/mildmojo/3724189\n      csv_to_employees = csv.join(employees, Arel::Nodes::OuterJoin)\n                          .on(csv_employee_join)\n                          .join_sources\n      employees_to_csv = employees.join(csv, Arel::Nodes::OuterJoin)\n                          .on(csv_employee_join)\n                          .join_sources\n\n      # Representation of the joined csv-employees, with csv on the left\n      csv_records = deduped_csv.joins(csv_to_employees).order('row_number asc')\n      # Representation of the joined employees-csv, with employees on the\n      # left\n      employee_records = temp_current_directory.joins(employees_to_csv)\n\n      # Cleanup some bad records\n      # 1. Assistant email is set on an employee, but no assistant record\n      #    in csv. Remove the assistant email.\n      # 2. Assistant email is employee's own email. Remove the assistant\n      #    email.\n      # TODO move this into the temp table creation above\n      # https://www.db-fiddle.com/f/gxg6qABP1LygYvvgRvyH2N/1\n      cleanup_sql = <<-SQL\n        with\n          unnested_assistants as\n          (\n            select\n              email,\n              name,\n              unnest(assistants) assistant\n            from \#{csv.name} \n          ),\n          own_email_removed as\n          (\n            select\n              a.*\n            from unnested_assistants a\n            where a.email != a.assistant\n          ),\n          missing_assistants_removed as\n          (\n            select\n              a.*\n            from own_email_removed a\n            left outer join \#{csv.name} b on a.assistant = b.email\n            where\n              (a.assistant is null and b.email is null)\n              or (a.assistant is not null and b.email is not null)\n          ),\n          only_valid_assistants as\n          (\n            select\n              a.email, \n              a.name,\n              array_remove(\n                array_agg(b.assistant),\n                null\n              ) assistants\n            from \#{csv.name} a\n            left outer join missing_assistants_removed b\n            using (email)\n            group by\n              a.email, a.name\n          )\n        update \#{csv.name}\n        set assistants = only_valid_assistants.assistants\n        from only_valid_assistants\n        where \#{csv.name}.email = only_valid_assistants.email\n      SQL\n      deduped_csv.connection.execute(cleanup_sql)\n\n      # new records are records in the new directory that don't exist in\n      # the current directory\n      new_records = csv_records.select(\"'insert'::varchar operation, row_number\")\n                      .select(:name, :email, :phone_number, :assistants, :extra)\n                      .where({ employees.name => { email: nil } })\n      # deleted records are records in the current directory that don't\n      # exist in the new directory\n      deleted_records = employee_records.select(\"'delete'::varchar operation, row_number\")\n                          .select(:name, :email, :phone_number, :assistants, :extra)\n                          .where({ csv.name => { email: nil } })\n      # changed records are records that have difference in name, phone\n      # number and/or assistants\n      changed_records = csv_records.select(\"'update'::varchar operation, row_number\")\n                          .select(:name, :email, :phone_number, :assistants, :extra)\n                          .where.not(attributes_unchanged)\n      # unchanged records are records that are exactly the same in both\n      # directories (without considering the extra field)\n      unchanged_records = csv_records.select(\"'noop'::varchar operation, row_number\")\n                            .select(:name, :email, :phone_number, :assistants, :extra)\n                            .where(attributes_unchanged)\n\n      # create temp table for holding operations\n      operations_temp_table = \"temporary_operations_\#{self.object_id}\"\n      deduped_csv.connection.with_temporary_table operations_temp_table, new_records.to_sql do |name|\n        dec = ActiveRecordPgStuff::Relation::TemporaryTable::Decorator.new csv_records.klass, name\n        rel = ActiveRecord::Relation.new dec, table: dec.arel_table\n        rel.readonly!\n\n        rel.connection.execute(\"insert into \#{name}(operation, row_number, name, email, phone_number, assistants, extra) \#{deleted_records.to_sql}\")\n        rel.connection.execute(\"insert into \#{name}(operation, row_number, name, email, phone_number, assistants, extra) \#{changed_records.to_sql}\")\n\n        if options[:skip_noop] != true\n          rel.connection.execute(\"insert into \#{name}(operation, row_number, name, email, phone_number, assistants, extra) \#{unchanged_records.to_sql}\")\n        end\n\n        rel.order(:row_number).each do |operation|\n          add_operation(operation)\n        end\n      end\n    end\n  end\n\n  prioritize_assistants(operations)\nend\n"