Class: SqlCompare

Inherits:
Object
  • Object
show all
Defined in:
lib/coopy/sql_compare.rb

Instance Method Summary collapse

Constructor Details

#initialize(db, table1, table2) ⇒ SqlCompare

Returns a new instance of SqlCompare.



5
6
7
8
9
10
11
12
# File 'lib/coopy/sql_compare.rb', line 5

def initialize(db1,db2)
  @db1 = db
  @db2 = db2
  @table1 = nil
  @table2 = nil
  @single_db = false
  raise "not implemented yet"
end

Instance Method Details

#applyObject



26
27
28
# File 'lib/coopy/sql_compare.rb', line 26

def apply
  apply_single
end

#apply_deletes(sql, all_cols, keys_in_all_cols) ⇒ Object



146
147
148
149
150
151
152
# File 'lib/coopy/sql_compare.rb', line 146

def apply_deletes(sql,all_cols,keys_in_all_cols)
  @db1.fetch(sql,all_cols) do |row|
    cells = row.map{|v| { :txt => v, :value => v, :cell_mode => "" }}
    rc = RowChange.new("---",cells)
    apply_rc(rc,row,keys_in_all_cols)
  end
end

#apply_inserts(sql, all_cols, keys_in_all_cols) ⇒ Object



121
122
123
124
125
126
127
# File 'lib/coopy/sql_compare.rb', line 121

def apply_inserts(sql,all_cols,keys_in_all_cols)
  @db1.fetch(sql,all_cols) do |row|
    cells = row.map{|v| { :txt => v, :value => v, :cell_mode => "" }}
    rc = RowChange.new("+++",cells)
    apply_rc(rc,row,keys_in_all_cols)
  end
end

#apply_rc(rc, row, keys_in_cols) ⇒ Object



154
155
156
157
158
159
160
161
162
# File 'lib/coopy/sql_compare.rb', line 154

def apply_rc(rc,row,keys_in_cols)
  rc.columns = @rc_columns
  if @patch.want_context
    rc.key = keyify(row.values_at(*keys_in_cols))
    @pending_rcs << rc
  else
    @patch.apply_row(rc)
  end
end

#apply_singleObject

When working within a single database, we can delegate more work to SQL. So we specialize this case.



54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# File 'lib/coopy/sql_compare.rb', line 54

def apply_single
  validate_schema

  # Prepare some lists of columns.
  key_cols = @db1.primary_key(@table1)
  data_cols = @db1.except_primary_key(@table1)
  all_cols = @db1.column_names(@table1)

  # Let our public know we are beginning.
  @patch.begin_diff

  # Advertise column names.
  @rc_columns = DiffColumns.new
  @rc_columns.title_row = all_cols
  @rc_columns.update(0)
  cells = all_cols.map{|v| { :txt => v, :value => v, :cell_mode => "" }}
  rc = RowChange.new("@@",cells)
  @patch.apply_row(rc)

  # If requested, we will be providing context rows around changed rows.
  # This is not a natural thing to do with SQL, so we do it only on request.
  # When requested, we need to buffer row changes.
  @pending_rcs = []

  # Prepare some useful SQL fragments to assemble later.
  sql_table1 = @db1.quote_table(@table1)
  sql_table2 = @db1.quote_table(@table2)
  sql_key_cols = key_cols.map{|c| @db1.quote_column(c)}.join(",")
  sql_all_cols = all_cols.map{|c| @db1.quote_column(c)}.join(",")
  sql_key_match = key_cols.map{|c| @db1.quote_column(c)}.map{|c| "#{sql_table1}.#{c} IS #{sql_table2}.#{c}"}.join(" AND ")
  sql_data_mismatch = data_cols.map{|c| @db1.quote_column(c)}.map{|c| "#{sql_table1}.#{c} IS NOT #{sql_table2}.#{c}"}.join(" OR ")

  # For one query we will need to interleave columns from two tables.  For
  # portability we need to give these columns distinct names.
  weave = all_cols.map{|c| [[sql_table1,@db1.quote_column(c)],
                            [sql_table2,@db2.quote_column(c)]]}.flatten(1)
  dbl_cols = weave.map{|c| "#{c[0]}.#{c[1]}"}
  sql_dbl_cols = weave.map{|c| "#{c[0]}.#{c[1]} AS #{c[0].gsub(/[^a-zA-Z0-9]/,'_')}_#{c[1].gsub(/[^a-zA-Z0-9]/,'_')}"}.join(",")

  # Prepare a map of primary key offsets.
  keys_in_all_cols = key_cols.each.map{|c| all_cols.index(c)}
  keys_in_dbl_cols = keys_in_all_cols.map{|x| 2*x}

  # Find rows in table2 that are not in table1.
  sql = "SELECT #{sql_all_cols} FROM #{sql_table2} WHERE NOT EXISTS (SELECT 1 FROM #{sql_table1} WHERE #{sql_key_match})"
  apply_inserts(sql,all_cols,keys_in_all_cols)

  # Find rows in table1 and table2 that differ while having the same primary
  # key.
  sql = "SELECT #{sql_dbl_cols} FROM #{sql_table1} INNER JOIN #{sql_table2} ON #{sql_key_match} WHERE #{sql_data_mismatch}"
  apply_updates(sql,dbl_cols,keys_in_dbl_cols)

  # Find rows that are in table1 but not table2
  sql = "SELECT #{sql_all_cols} FROM #{sql_table1} WHERE NOT EXISTS (SELECT 1 FROM #{sql_table2} WHERE #{sql_key_match})"
  apply_deletes(sql,all_cols,keys_in_all_cols)

  # If we are supposed to provide context, we need to deal with row order.
  if @patch.want_context
    sql = "SELECT #{sql_all_cols}, 0 AS __coopy_tag__ FROM #{sql_table1} UNION SELECT #{sql_all_cols}, 1 AS __coopy_tag__ FROM #{sql_table2} ORDER BY #{sql_key_cols}, __coopy_tag__"
    apply_with_context(sql,all_cols,keys_in_all_cols)
  end

  # Done!
  @patch.end_diff
end

#apply_updates(sql, dbl_cols, keys_in_dbl_cols) ⇒ Object



130
131
132
133
134
135
136
137
138
139
140
141
142
143
# File 'lib/coopy/sql_compare.rb', line 130

def apply_updates(sql,dbl_cols,keys_in_dbl_cols)
  @db1.fetch(sql,dbl_cols) do |row|
    pairs = row.enum_for(:each_slice,2).to_a
    cells = pairs.map do |v| 
      if v[0]==v[1] 
        { :txt => v[0], :value => v[0], :cell_mode => "" }
      else
        { :txt => v[0], :value => v[0], :new_value => v[1], :cell_mode => "->" }
      end
    end
    rc = RowChange.new("->",cells)
    apply_rc(rc,row,keys_in_dbl_cols)
  end
end

#apply_with_context(sql, all_cols, keys_in_all_cols) ⇒ Object

Do the context dance.



172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
# File 'lib/coopy/sql_compare.rb', line 172

def apply_with_context(sql,all_cols,keys_in_all_cols)
  hits = {}
  @pending_rcs.each do |rc|
    hits[rc.key] = rc
  end 
  hist = []
  n = 2
  pending = 0
  skipped = false
  noted = false
  last_row = nil
  @db1.fetch(sql,all_cols + ["__coopy_tag__"]) do |row|
    tag = row.pop.to_i
    k = keyify(row.values_at(*keys_in_all_cols))
    if hits[k]
      emit_skip(row) if skipped
      hist.each do |row0|
        cells = row0.map{|v| { :txt => v, :value => v, :cell_mode => "" }}
        rc = RowChange.new("",cells)
        rc.columns = @rc_columns
        @patch.apply_row(rc)
      end
      hist.clear
      pending = n
      @patch.apply_row(hits[k])
      hits.delete(k)
      skipped = false
      noted =  true
    elsif tag == 1
      # ignore redundant row
    elsif pending>0
      emit_skip(row) if skipped
      cells = row.map{|v| { :txt => v, :value => v, :cell_mode => "" }}
      rc = RowChange.new("",cells)
      rc.columns = @rc_columns
      @patch.apply_row(rc)
      pending = pending-1
      skipped = false
    else
      hist << row
      if hist.length>n
        skipped = true
        last_row = row
        hist.shift
      end
    end
  end
  emit_skip(last_row) if skipped and noted
end

#emit_skip(row) ⇒ Object



164
165
166
167
168
169
# File 'lib/coopy/sql_compare.rb', line 164

def emit_skip(row)
  cells = row.map{|v| { :txt => "...", :value => "...", :cell_mode => "" }}
  rc = RowChange.new("...",cells)
  rc.columns = @rc_columns
  @patch.apply_row(rc)
end

#keyify(lst) ⇒ Object



48
49
50
# File 'lib/coopy/sql_compare.rb', line 48

def keyify(lst)
  lst.map{|x| x.to_s}.join("___")
end

#set_output(patch) ⇒ Object



22
23
24
# File 'lib/coopy/sql_compare.rb', line 22

def set_output(patch)
  @patch = patch
end

#validate_schemaObject

We are not implementing full comparison, just an adequate subset for easy cases (a table with a trustworthy primary key, and constant columns). Make sure we are not trying to do something we’re not ready for.



34
35
36
37
38
39
40
41
42
43
44
45
46
# File 'lib/coopy/sql_compare.rb', line 34

def validate_schema
  all_cols1 = @db1.column_names(@table1)
  all_cols2 = @db2.column_names(@table2)
  if all_cols1 != all_cols2
    raise "Columns do not match, please use full coopy toolbox"
  end

  key_cols1 = @db1.primary_key(@table1)
  key_cols2 = @db2.primary_key(@table2)
  if key_cols1 != key_cols2
    raise "Primary keys do not match, please use full coopy toolbox"
  end
end