Module: DatabaseSanitizer
- Defined in:
- lib/database_sanitizer.rb,
lib/database_sanitizer.rb,
lib/database_sanitizer/version.rb,
lib/database_sanitizer/transformers.rb
Defined Under Namespace
Classes: Source
Constant Summary collapse
- CHUNK_SIZE =
500- VERSION =
'0.0.10'- Transformers =
{ 'email' => ->(i, rec) { "email#{i.to_s.rjust(5, ?0)}@#{rec.split(?@)[1]}"}, 'wipe' => proc { nil }, 'zero' => proc { 0 }, 'empty_string' => proc { '' }, 'name' => proc { 'John Doe' }, 'phone_number' => ->(i, rec) { rec.nil? ? rec : "#{rec[0,3]}#{i.to_s.rjust rec.length-3, ?0}" } }
Class Method Summary collapse
- .duplicate_schema(schema = nil) ⇒ Object
- .export(src, dest, opts = {}) ⇒ Object
- .extract_transformer(comment) ⇒ Object
- .get_chunks(table) ⇒ Object
- .read_comments(conn, tables) ⇒ Object
Class Method Details
.duplicate_schema(schema = nil) ⇒ Object
31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
# File 'lib/database_sanitizer.rb', line 31 def duplicate_schema schema=nil schema_src = nil if schema.nil? schema_sio = StringIO.new puts 'Dumping schema.rb...' ActiveRecord::SchemaDumper.dump(Source.connection, schema_sio) puts 'Loading schema.rb...' ActiveRecord::Migration. { eval schema_sio.string } else puts 'Reading schema SQL...' schema_src = IO.read File.(schema, Dir.pwd) ActiveRecord::Migration. { ActiveRecord::Base.connection.exec_query schema_src } end end |
.export(src, dest, opts = {}) ⇒ Object
58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
# File 'lib/database_sanitizer.rb', line 58 def export src, dest, opts={} duplicate_schema opts[:schema] tables = (opts[:tables] || src.tables.collect(&:to_s)) - (opts[:exclude] || []) transformers = read_comments dest, tables max_tbl_name_len = transformers.keys.map(&:length).sort.last || 0 tables.with_progress('Exporting').each do |table| q_table = dest.quote_table_name table query = "SELECT * FROM #{q_table} LIMIT #{CHUNK_SIZE} OFFSET " get_chunks(table).times_with_progress(table.rjust max_tbl_name_len) do |chunk_i| offset = chunk_i * CHUNK_SIZE result = src.exec_query query + offset.to_s cols = result.columns.map { |col| dest.quote_column_name col }.join ',' dest.transaction do result.rows.with_progress('batch').each_with_index do |src_row, row_i| values = result.columns.each_with_index.map do |col, col_i| transformer = transformers[table.to_sym][col.to_sym] dest.quote transformer ? transformer.(offset + row_i, src_row[col_i]) : src_row[col_i] end dest.insert_sql "INSERT INTO #{q_table} (#{cols}) VALUES (#{values.join ','})" end end end end end |
.extract_transformer(comment) ⇒ Object
15 |
# File 'lib/database_sanitizer.rb', line 15 def extract_transformer comment; comment ? comment[/sanitize: ?(\w+)/,1] : nil; end |
.get_chunks(table) ⇒ Object
46 47 48 49 50 51 52 53 54 55 56 |
# File 'lib/database_sanitizer.rb', line 46 def get_chunks table conn = Source.connection query = "SELECT count(*) FROM #{conn.quote_table_name table}" pg_query = "SELECT reltuples::bigint FROM pg_class WHERE relname=#{conn.quote table}" res = conn.adapter_name == 'PostgreSQL' ? (conn.exec_query(pg_query) rescue false) : false unless res puts 'Counting...' conn.exec_query(query) end res.rows[0][0].to_i / CHUNK_SIZE + 1 end |
.read_comments(conn, tables) ⇒ Object
17 18 19 20 21 22 23 24 25 26 27 28 29 |
# File 'lib/database_sanitizer.rb', line 17 def read_comments conn, tables tables.inject({}) do |transformers, table| transformers[table.to_sym] = conn.retrieve_column_comments(table.to_sym).inject({}) do |table_transformers, column| transformer_key = extract_transformer column[1] unless transformer_key.nil? || Transformers.include?(transformer_key) abort "Transformer '#{transformer_key}' not found (#{table}.#{column[0]})" end table_transformers[column[0]] = transformer_key && Transformers[transformer_key] table_transformers end transformers end end |