Class: DirectoryDiff::Transformer::TempTable

Inherits:
Object
  • Object
show all
Defined in:
lib/directory_diff/transformer/temp_table.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(current_directory) ⇒ TempTable

Returns a new instance of TempTable.



18
19
20
21
# File 'lib/directory_diff/transformer/temp_table.rb', line 18

def initialize(current_directory)
  @current_directory = current_directory
  @operations = []
end

Instance Attribute Details

#current_directoryObject (readonly)

Returns the value of attribute current_directory.



12
13
14
# File 'lib/directory_diff/transformer/temp_table.rb', line 12

def current_directory
  @current_directory
end

#operationsObject (readonly)

Returns the value of attribute operations.



12
13
14
# File 'lib/directory_diff/transformer/temp_table.rb', line 12

def operations
  @operations
end

Instance Method Details

#into(new_directory, options = {}) ⇒ Object

Parameters:

  • new_directory

    a table containing only the new records to compare against, most likely a temp table.



25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
# File 'lib/directory_diff/transformer/temp_table.rb', line 25

def into(new_directory, options = {})
  projection = <<-SQL
    name, 
    lower(email) email, 
    coalesce(phone_number, '') phone_number,
    array_remove(
      regexp_split_to_array(
        coalesce(assistants, ''),
        '\s*,\s*'
      )::varchar[],
      ''
    ) assistants
  SQL
  current_directory.select(projection).temporary_table do |temp_current_directory|
    # Remove dupe email rows, keeping the last one
    latest_unique_sql = <<-SQL
      SELECT 
        DISTINCT ON (lower(email)) name, 
        lower(email) email,
        coalesce(phone_number, '') phone_number,
        array_remove(
          regexp_split_to_array(
            coalesce(assistants, ''),
            '\s*,\s*'
          )::varchar[],
          ''
        ) assistants, 
        extra, 
        ROW_NUMBER () OVER ()
      FROM 
        #{new_directory.arel_table.name} 
      ORDER BY 
        lower(email), 
        row_number desc
    SQL

    new_directory.select('*')
      .from(Arel.sql("(#{latest_unique_sql}) as t"))
      .order("row_number").temporary_table do |deduped_csv|
      # Get Arel tables for referencing fields, table names
      employees = temp_current_directory.table
      csv = deduped_csv.table

      # Reusable Arel predicates
      csv_employee_join = csv[:email].eq(employees[:email])
      attributes_unchanged = employees[:name].eq(csv[:name])
                              .and(employees[:phone_number].eq(csv[:phone_number]))
                              .and(employees[:assistants].contains(csv[:assistants]))

      # Creates joins between the temp table and the csv table and
      # vice versa
      # Cribbed from https://gist.github.com/mildmojo/3724189
      csv_to_employees = csv.join(employees, Arel::Nodes::OuterJoin)
                          .on(csv_employee_join)
                          .join_sources
      employees_to_csv = employees.join(csv, Arel::Nodes::OuterJoin)
                          .on(csv_employee_join)
                          .join_sources

      # Representation of the joined csv-employees, with csv on the left
      csv_records = deduped_csv.joins(csv_to_employees).order('row_number asc')
      # Representation of the joined employees-csv, with employees on the
      # left
      employee_records = temp_current_directory.joins(employees_to_csv)

      # Cleanup some bad records
      # 1. Assistant email is set on an employee, but no assistant record
      #    in csv. Remove the assistant email.
      # 2. Assistant email is employee's own email. Remove the assistant
      #    email.
      # TODO move this into the temp table creation above
      # https://www.db-fiddle.com/f/gxg6qABP1LygYvvgRvyH2N/1
      cleanup_sql = <<-SQL
        with
          unnested_assistants as
          (
            select
              email,
              name,
              unnest(assistants) assistant
            from #{csv.name} 
          ),
          own_email_removed as
          (
            select
              a.*
            from unnested_assistants a
            where a.email != a.assistant
          ),
          missing_assistants_removed as
          (
            select
              a.*
            from own_email_removed a
            left outer join #{csv.name} b on a.assistant = b.email
            where
              (a.assistant is null and b.email is null)
              or (a.assistant is not null and b.email is not null)
          ),
          only_valid_assistants as
          (
            select
              a.email, 
              a.name,
              array_remove(
                array_agg(b.assistant),
                null
              ) assistants
            from #{csv.name} a
            left outer join missing_assistants_removed b
            using (email)
            group by
              a.email, a.name
          )
        update #{csv.name}
        set assistants = only_valid_assistants.assistants
        from only_valid_assistants
        where #{csv.name}.email = only_valid_assistants.email
      SQL
      deduped_csv.connection.execute(cleanup_sql)

      # new records are records in the new directory that don't exist in
      # the current directory
      new_records = csv_records.select("'insert'::varchar operation, row_number")
                      .select(:name, :email, :phone_number, :assistants, :extra)
                      .where({ employees.name => { email: nil } })
      # deleted records are records in the current directory that don't
      # exist in the new directory
      deleted_records = employee_records.select("'delete'::varchar operation, row_number")
                          .select(:name, :email, :phone_number, :assistants, :extra)
                          .where({ csv.name => { email: nil } })
      # changed records are records that have difference in name, phone
      # number and/or assistants
      changed_records = csv_records.select("'update'::varchar operation, row_number")
                          .select(:name, :email, :phone_number, :assistants, :extra)
                          .where.not(attributes_unchanged)
      # unchanged records are records that are exactly the same in both
      # directories (without considering the extra field)
      unchanged_records = csv_records.select("'noop'::varchar operation, row_number")
                            .select(:name, :email, :phone_number, :assistants, :extra)
                            .where(attributes_unchanged)

      # create temp table for holding operations
      operations_temp_table = "temporary_operations_#{self.object_id}"
      deduped_csv.connection.with_temporary_table operations_temp_table, new_records.to_sql do |name|
        dec = ActiveRecordPgStuff::Relation::TemporaryTable::Decorator.new csv_records.klass, name
        rel = ActiveRecord::Relation.new dec, table: dec.arel_table
        rel.readonly!

        rel.connection.execute("insert into #{name}(operation, row_number, name, email, phone_number, assistants, extra) #{deleted_records.to_sql}")
        rel.connection.execute("insert into #{name}(operation, row_number, name, email, phone_number, assistants, extra) #{changed_records.to_sql}")

        if options[:skip_noop] != true
          rel.connection.execute("insert into #{name}(operation, row_number, name, email, phone_number, assistants, extra) #{unchanged_records.to_sql}")
        end

        rel.order(:row_number).each do |operation|
          add_operation(operation)
        end
      end
    end
  end

  prioritize_assistants(operations)
end