Class: ActiveDataFrame::Database
- Inherits:
-
Object
- Object
- ActiveDataFrame::Database
- Defined in:
- lib/active_data_frame/database.rb
Instance Attribute Summary collapse
-
#block_type ⇒ Object
readonly
Returns the value of attribute block_type.
-
#data_frame_type ⇒ Object
readonly
Returns the value of attribute data_frame_type.
Class Method Summary collapse
- .batch ⇒ Object
- .batching ⇒ Object
- .batching=(value) ⇒ Object
- .execute(sql) ⇒ Object
- .flush! ⇒ Object
- .for_types(block:, df:) ⇒ Object
Instance Method Summary collapse
- #bulk_delete(id, indices) ⇒ Object
-
#bulk_insert(new_blocks, columns = block_type::COLUMNS) ⇒ Object
Insert block data for all blocks in a single call.
-
#bulk_update(existing, columns = block_type::COLUMNS) ⇒ Object
Update block data for all blocks in a single call.
- #bulk_upsert(updates, inserts) ⇒ Object
-
#initialize(block_type, data_frame_type) ⇒ Database
constructor
A new instance of Database.
Constructor Details
#initialize(block_type, data_frame_type) ⇒ Database
Returns a new instance of Database.
46 47 48 49 |
# File 'lib/active_data_frame/database.rb', line 46 def initialize(block_type, data_frame_type) @block_type = block_type @data_frame_type = data_frame_type end |
Instance Attribute Details
#block_type ⇒ Object (readonly)
Returns the value of attribute block_type.
44 45 46 |
# File 'lib/active_data_frame/database.rb', line 44 def block_type @block_type end |
#data_frame_type ⇒ Object (readonly)
Returns the value of attribute data_frame_type.
44 45 46 |
# File 'lib/active_data_frame/database.rb', line 44 def data_frame_type @data_frame_type end |
Class Method Details
.batch ⇒ Object
51 52 53 54 55 56 57 58 59 60 |
# File 'lib/active_data_frame/database.rb', line 51 def self.batch self.batching, prev_batch = true, self.batching Thread.current[:batch] ||= '' ActiveRecord::Base.transaction do yield end ensure self.batching = prev_batch flush! unless self.batching end |
.batching ⇒ Object
4 5 6 |
# File 'lib/active_data_frame/database.rb', line 4 def self.batching !!Thread.current[:active_data_frame_batching] end |
.batching=(value) ⇒ Object
8 9 10 |
# File 'lib/active_data_frame/database.rb', line 8 def self.batching=(value) Thread.current[:active_data_frame_batching] = !!value end |
.execute(sql) ⇒ Object
12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 |
# File 'lib/active_data_frame/database.rb', line 12 def self.execute(sql) if ActiveDataFrame::Database.batching Thread.current[:batch] << sql << ?; else unless sql.empty? ActiveRecord::Base.transaction do ActiveDataFrame::DataFrameProxy.suppress_logs do case ActiveRecord::Base.connection_config[:adapter] when 'sqlite3'.freeze ActiveRecord::Base.connection.raw_connection.execute_batch sql when 'mysql2' sql.split(';').reject{|x| x.strip.empty?}.each do |stmt| ActiveRecord::Base.connection.execute(stmt) end else ActiveRecord::Base.connection.execute(sql) end end end end end end |
.flush! ⇒ Object
35 36 37 38 |
# File 'lib/active_data_frame/database.rb', line 35 def self.flush! execute(Thread.current[:batch]) Thread.current[:batch] = '' end |
Instance Method Details
#bulk_delete(id, indices) ⇒ Object
146 147 148 149 150 151 |
# File 'lib/active_data_frame/database.rb', line 146 def bulk_delete(id, indices) indices.each_slice(ActiveDataFrame.delete_max_batch_size) do |slice| # puts "Deleting slice of #{slice.length}" block_type.where(data_frame_id: id, period_index: slice).delete_all end end |
#bulk_insert(new_blocks, columns = block_type::COLUMNS) ⇒ Object
Insert block data for all blocks in a single call
156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 |
# File 'lib/active_data_frame/database.rb', line 156 def bulk_insert(new_blocks, columns=block_type::COLUMNS) new_blocks.each_slice(ActiveDataFrame.insert_max_batch_size) do |new_blocks_slice| # puts "Inserting slice of #{new_blocks_slice.length}" inserts = '' new_blocks_slice.each do |period_index, (values, df_id)| inserts << \ case ActiveRecord::Base.connection_config[:adapter] when 'postgresql', 'mysql2' then "(#{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}, #{df_id}, #{period_index}, '#{data_frame_type.name}')," else "(#{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}, #{df_id}, #{period_index}, '#{data_frame_type.name}')," end end sql = "INSERT INTO #{block_type.table_name} (#{columns.join(',')}, data_frame_id, period_index, data_frame_type) VALUES #{inserts[0..-2]}" Database.execute sql end end |
#bulk_update(existing, columns = block_type::COLUMNS) ⇒ Object
Update block data for all blocks in a single call
77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
# File 'lib/active_data_frame/database.rb', line 77 def bulk_update(existing, columns=block_type::COLUMNS) existing.each_slice(ActiveDataFrame.update_max_batch_size) do |existing_slice| # puts "Updating slice of #{existing_slice.length}" case ActiveRecord::Base.connection_config[:adapter] when 'postgresql'.freeze # # PostgreSQL Supports the fast setting of multiple update values that differ # per row from a temporary table. # updates = '' existing_slice.each do |period_index, (values, df_id)| updates << "(#{df_id}, #{period_index}, #{values.map{|v| v.inspect.gsub('"',"'") }.join(',')})," end Database.execute( <<-SQL UPDATE #{block_type.table_name} SET #{columns.map{|col| "#{col} = t.#{col}" }.join(", ")} FROM( VALUES #{updates[0..-2]}) as t(data_frame_id, period_index, #{columns.join(',')}) WHERE #{block_type.table_name}.data_frame_id = t.data_frame_id AND #{block_type.table_name}.period_index = t.period_index AND #{block_type.table_name}.data_frame_type = '#{data_frame_type.name}' SQL ) # # For MySQL we use the ON DUPLICATE KEY UPDATE functionality. # This relies on there being a unique index dataframe and period index # on the blocks table. # This tends to be faster than the general CASE based solution below # but slower than the PostgreSQL solution above # when 'mysql2'.freeze # Fast bulk update updates, on_duplicate = "", "" existing_slice.each do |period_index, (values, df_id)| updates << "(#{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}, #{df_id}, #{period_index}, '#{data_frame_type.name}')," end on_duplicate = columns.map do |cname| "#{cname}=VALUES(#{cname})" end.join(", ") stmt = <<-SQL INSERT INTO #{block_type.table_name} (#{columns.join(',')},data_frame_id,period_index,data_frame_type) VALUES #{updates[0..-2]} ON DUPLICATE KEY UPDATE #{on_duplicate} SQL Database.execute(stmt) else # # General CASE based solution for multiple differing updates # set per row. # We use a CASE statement per column which determines the column # to set based on the period index # ids = existing_slice.map {|_, (_, id)| id} updates = columns.map.with_index do |column, column_idx| [column, "CASE \n#{existing_slice.map{|period_index, (values, df_id)| "WHEN period_index=#{period_index} AND data_frame_id=#{df_id} then #{values[column_idx]}" }.join("\n")} \nEND\n"] end.to_h update_statement = updates.map{|cl, up| "#{cl} = #{up}" }.join(', ') Database.execute(<<-SQL UPDATE #{block_type.table_name} SET #{update_statement} WHERE #{block_type.table_name}.data_frame_id IN (#{ids.join(',')}) AND #{block_type.table_name}.data_frame_type = '#{data_frame_type.name}' AND #{block_type.table_name}.period_index IN (#{existing_slice.map(&:first).join(', ')}); SQL ) end end end |
#bulk_upsert(updates, inserts) ⇒ Object
62 63 64 65 66 67 68 69 70 71 72 73 |
# File 'lib/active_data_frame/database.rb', line 62 def bulk_upsert(updates, inserts) Database.batch do updates.group_by(&:keys).transform_values{|v| v.map(&:values) }.each do |columns, rows| update = rows.map{|df_id, period_index, *values| [period_index, [values, df_id]] } bulk_update(update, columns - [:data_frame_id, :period_index]) end inserts.group_by(&:keys).transform_values{|v| v.map(&:values) }.each do |columns, rows| insert = rows.map{|df_id, period_index, *values| [period_index, [values, df_id]] } bulk_insert(insert, columns - [:data_frame_id, :period_index]) end end end |