Class: RdbmsSampler::TableSample

Inherits:
Object
  • Object
show all
Defined in:
lib/rdbms_sampler/table_sample.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(connection, schema_name, table_name, size = 1000) ⇒ TableSample

Returns a new instance of TableSample.



11
12
13
14
15
16
17
18
19
20
# File 'lib/rdbms_sampler/table_sample.rb', line 11

def initialize(connection, schema_name, table_name, size = 1000)
  @schema = schema_name
  @table = table_name
  @connection = connection
  @size = size
  @pending_dependencies = Set.new
  @sample = Set.new
  @sampled = false
  @sampled_ids = Set.new
end

Instance Attribute Details

#pending_dependenciesObject (readonly)

Returns the value of attribute pending_dependencies.



9
10
11
# File 'lib/rdbms_sampler/table_sample.rb', line 9

def pending_dependencies
  @pending_dependencies
end

Instance Method Details

#add(row) ⇒ Object

Add a row to the table sample. Returns number of new dependencies introduced.



57
58
59
60
61
62
63
# File 'lib/rdbms_sampler/table_sample.rb', line 57

def add(row)
  return 0 unless @sample.add? row
  @sampled_ids.add row['id'] if row['id']
  dependencies_for(row).collect { |dep|
    1 if @pending_dependencies.add?(dep)
  }.compact.sum
end

#ensure_referential_integrity(sample) ⇒ Object

Parameters:



66
67
68
69
70
71
72
73
# File 'lib/rdbms_sampler/table_sample.rb', line 66

def ensure_referential_integrity(sample)
  dependencies_in_progress = @pending_dependencies
  @pending_dependencies = Set.new
  dependencies_in_progress.map { |dependency|
    dependency_sample = sample.table_sample_for_dependency(dependency)
    dependency_sample.fulfil(dependency)
  }.compact.sum
end

#fulfil(dependency) ⇒ Object

Add the given dependency to the sample

Parameters:



37
38
39
40
41
42
43
44
45
# File 'lib/rdbms_sampler/table_sample.rb', line 37

def fulfil(dependency)
  return 0 if fulfilled?(dependency)
  quoted_column = @connection.quote_column_name dependency.child_key
  quoted_value = @connection.quote dependency.value
  sql = "SELECT * FROM #{quoted_name} WHERE #{quoted_column} = #{quoted_value}"
  row = @connection.select_one(sql)
  raise "Could not fulfil #{dependency} using query [#{sql}]" if row.nil?
  add row
end

#fulfilled?(dependency) ⇒ Boolean

Parameters:

Returns:

  • (Boolean)


48
49
50
51
52
53
# File 'lib/rdbms_sampler/table_sample.rb', line 48

def fulfilled?(dependency)
  # FIXME: Only handles `id` column
  return false if dependency.child_key != 'id'

  @sampled_ids.include?(dependency.value)
end

#identifierObject



31
32
33
# File 'lib/rdbms_sampler/table_sample.rb', line 31

def identifier
  "#{@schema}.#{@table}"
end

#quoted_nameObject



92
93
94
# File 'lib/rdbms_sampler/table_sample.rb', line 92

def quoted_name
  @connection.quote_table_name(@schema)+'.'+@connection.quote_table_name(@table)
end

#sample!Object



22
23
24
25
# File 'lib/rdbms_sampler/table_sample.rb', line 22

def sample!
  fetch(@size) unless @sampled
  @sample
end

#sizeObject



27
28
29
# File 'lib/rdbms_sampler/table_sample.rb', line 27

def size
  @sampled ? @sample.size : @size
end

#to_sqlObject



75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# File 'lib/rdbms_sampler/table_sample.rb', line 75

def to_sql
  ret = "\n-- Sample from #{quoted_name} (#{@sample.count} rows)\n"
  unless @sample.empty?
    quoted_cols = @sample.first.keys.collect { |col| @connection.quote_column_name col }
    # INSERT in batches to reduce the likelihood of hitting `max_allowed_packet`
    @sample.each_slice(250) do |rows|
      values = rows.collect { |row|
        row.values.map { |val|
          @connection.quote(val)
        } * ','
      } * "),\n  ("
      ret << "INSERT INTO #{quoted_name} \n  (#{quoted_cols * ','}) \nVALUES \n  (#{values});\n"
    end
  end
  ret
end