Module: DatabaseSanitizer

Defined in:
lib/database_sanitizer.rb,
lib/database_sanitizer.rb,
lib/database_sanitizer/version.rb,
lib/database_sanitizer/transformers.rb

Defined Under Namespace

Classes: Source

Constant Summary collapse

CHUNK_SIZE =
500
VERSION =
'0.0.10'
Transformers =
{
  'email' => ->(i, rec) { "email#{i.to_s.rjust(5, ?0)}@#{rec.split(?@)[1]}"},
  'wipe' => proc { nil },
  'zero' => proc { 0 },
  'empty_string' => proc { '' },
  'name' => proc { 'John Doe' },
  'phone_number' => ->(i, rec) { rec.nil? ? rec : "#{rec[0,3]}#{i.to_s.rjust rec.length-3, ?0}" }
}

Class Method Summary collapse

Class Method Details

.duplicate_schema(schema = nil) ⇒ Object



31
32
33
34
35
36
37
38
39
40
41
42
43
44
# File 'lib/database_sanitizer.rb', line 31

def duplicate_schema schema=nil
  schema_src = nil
  if schema.nil?
    schema_sio = StringIO.new
    puts 'Dumping schema.rb...'
    ActiveRecord::SchemaDumper.dump(Source.connection, schema_sio)
    puts 'Loading schema.rb...'
    ActiveRecord::Migration.suppress_messages { eval schema_sio.string }
  else
    puts 'Reading schema SQL...'
    schema_src = IO.read File.expand_path(schema, Dir.pwd)
    ActiveRecord::Migration.suppress_messages { ActiveRecord::Base.connection.exec_query schema_src }
  end
end

.export(src, dest, opts = {}) ⇒ Object



58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# File 'lib/database_sanitizer.rb', line 58

def export src, dest, opts={}
  duplicate_schema opts[:schema]
  tables = (opts[:tables] || src.tables.collect(&:to_s)) - (opts[:exclude] || [])
  transformers = read_comments dest, tables
  max_tbl_name_len = transformers.keys.map(&:length).sort.last || 0

  tables.with_progress('Exporting').each do |table|
    q_table = dest.quote_table_name table
    query = "SELECT * FROM #{q_table} LIMIT #{CHUNK_SIZE} OFFSET "
    get_chunks(table).times_with_progress(table.rjust max_tbl_name_len) do |chunk_i|
      offset = chunk_i * CHUNK_SIZE
      result = src.exec_query query + offset.to_s
      cols = result.columns.map { |col| dest.quote_column_name col }.join ','
      dest.transaction do
        result.rows.with_progress('batch').each_with_index do |src_row, row_i|
          values = result.columns.each_with_index.map do |col, col_i|
            transformer = transformers[table.to_sym][col.to_sym]
            dest.quote transformer ? transformer.(offset + row_i, src_row[col_i]) : src_row[col_i]
          end
          dest.insert_sql "INSERT INTO #{q_table} (#{cols}) VALUES (#{values.join ','})"
        end
      end
    end
  end
end

.extract_transformer(comment) ⇒ Object



15
# File 'lib/database_sanitizer.rb', line 15

def extract_transformer comment; comment ? comment[/sanitize: ?(\w+)/,1] : nil; end

.get_chunks(table) ⇒ Object



46
47
48
49
50
51
52
53
54
55
56
# File 'lib/database_sanitizer.rb', line 46

def get_chunks table
  conn = Source.connection
  query = "SELECT count(*) FROM #{conn.quote_table_name table}"
  pg_query = "SELECT reltuples::bigint FROM pg_class WHERE relname=#{conn.quote table}"
  res = conn.adapter_name == 'PostgreSQL' ? (conn.exec_query(pg_query) rescue false) : false
  unless res
    puts 'Counting...'
    conn.exec_query(query)
  end
  res.rows[0][0].to_i / CHUNK_SIZE + 1
end

.read_comments(conn, tables) ⇒ Object



17
18
19
20
21
22
23
24
25
26
27
28
29
# File 'lib/database_sanitizer.rb', line 17

def read_comments conn, tables
  tables.inject({}) do |transformers, table|
    transformers[table.to_sym] = conn.retrieve_column_comments(table.to_sym).inject({}) do |table_transformers, column|
      transformer_key = extract_transformer column[1]
      unless transformer_key.nil? || Transformers.include?(transformer_key)
        abort "Transformer '#{transformer_key}' not found (#{table}.#{column[0]})"
      end
      table_transformers[column[0]] = transformer_key && Transformers[transformer_key]
      table_transformers
    end
    transformers
  end
end