Class: RdbmsSampler::TableSample
- Inherits:
-
Object
- Object
- RdbmsSampler::TableSample
- Defined in:
- lib/rdbms_sampler/table_sample.rb
Instance Attribute Summary collapse
-
#pending_dependencies ⇒ Object
readonly
Returns the value of attribute pending_dependencies.
Instance Method Summary collapse
-
#add(row) ⇒ Object
Add a row to the table sample.
- #ensure_referential_integrity(sample) ⇒ Object
-
#fulfil(dependency) ⇒ Object
Add the given dependency to the sample.
- #fulfilled?(dependency) ⇒ Boolean
- #identifier ⇒ Object
-
#initialize(connection, schema_name, table_name, size = 1000) ⇒ TableSample
constructor
A new instance of TableSample.
- #quoted_name ⇒ Object
- #sample! ⇒ Object
- #size ⇒ Object
- #to_sql ⇒ Object
Constructor Details
#initialize(connection, schema_name, table_name, size = 1000) ⇒ TableSample
Returns a new instance of TableSample.
11 12 13 14 15 16 17 18 19 20 |
# File 'lib/rdbms_sampler/table_sample.rb', line 11 def initialize(connection, schema_name, table_name, size = 1000) @schema = schema_name @table = table_name @connection = connection @size = size @pending_dependencies = Set.new @sample = Set.new @sampled = false @sampled_ids = Set.new end |
Instance Attribute Details
#pending_dependencies ⇒ Object (readonly)
Returns the value of attribute pending_dependencies.
9 10 11 |
# File 'lib/rdbms_sampler/table_sample.rb', line 9 def pending_dependencies @pending_dependencies end |
Instance Method Details
#add(row) ⇒ Object
Add a row to the table sample. Returns number of new dependencies introduced.
57 58 59 60 61 62 63 |
# File 'lib/rdbms_sampler/table_sample.rb', line 57 def add(row) return 0 unless @sample.add? row @sampled_ids.add row['id'] if row['id'] dependencies_for(row).collect { |dep| 1 if @pending_dependencies.add?(dep) }.compact.sum end |
#ensure_referential_integrity(sample) ⇒ Object
66 67 68 69 70 71 72 73 |
# File 'lib/rdbms_sampler/table_sample.rb', line 66 def ensure_referential_integrity(sample) dependencies_in_progress = @pending_dependencies @pending_dependencies = Set.new dependencies_in_progress.map { |dependency| dependency_sample = sample.table_sample_for_dependency(dependency) dependency_sample.fulfil(dependency) }.compact.sum end |
#fulfil(dependency) ⇒ Object
Add the given dependency to the sample
37 38 39 40 41 42 43 44 45 |
# File 'lib/rdbms_sampler/table_sample.rb', line 37 def fulfil(dependency) return 0 if fulfilled?(dependency) quoted_column = @connection.quote_column_name dependency.child_key quoted_value = @connection.quote dependency.value sql = "SELECT * FROM #{quoted_name} WHERE #{quoted_column} = #{quoted_value}" row = @connection.select_one(sql) raise "Could not fulfil #{dependency} using query [#{sql}]" if row.nil? add row end |
#fulfilled?(dependency) ⇒ Boolean
48 49 50 51 52 53 |
# File 'lib/rdbms_sampler/table_sample.rb', line 48 def fulfilled?(dependency) # FIXME: Only handles `id` column return false if dependency.child_key != 'id' @sampled_ids.include?(dependency.value) end |
#identifier ⇒ Object
31 32 33 |
# File 'lib/rdbms_sampler/table_sample.rb', line 31 def identifier "#{@schema}.#{@table}" end |
#quoted_name ⇒ Object
92 93 94 |
# File 'lib/rdbms_sampler/table_sample.rb', line 92 def quoted_name @connection.quote_table_name(@schema)+'.'+@connection.quote_table_name(@table) end |
#sample! ⇒ Object
22 23 24 25 |
# File 'lib/rdbms_sampler/table_sample.rb', line 22 def sample! fetch(@size) unless @sampled @sample end |
#size ⇒ Object
27 28 29 |
# File 'lib/rdbms_sampler/table_sample.rb', line 27 def size @sampled ? @sample.size : @size end |
#to_sql ⇒ Object
75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
# File 'lib/rdbms_sampler/table_sample.rb', line 75 def to_sql ret = "\n-- Sample from #{quoted_name} (#{@sample.count} rows)\n" unless @sample.empty? quoted_cols = @sample.first.keys.collect { |col| @connection.quote_column_name col } # INSERT in batches to reduce the likelihood of hitting `max_allowed_packet` @sample.each_slice(250) do |rows| values = rows.collect { |row| row.values.map { |val| @connection.quote(val) } * ',' } * "),\n (" ret << "INSERT INTO #{quoted_name} \n (#{quoted_cols * ','}) \nVALUES \n (#{values});\n" end end ret end |