Module: Tapsoob::Utils
Instance Method Summary collapse
- #base64decode(data) ⇒ Object
- #base64encode(data) ⇒ Object
- #bin(cmd) ⇒ Object
- #calculate_chunksize(old_chunksize) ⇒ Object
- #checksum(data) ⇒ Object
- #encode_blobs(row, columns) ⇒ Object
- #export_indexes(dump_path, table, index_data) ⇒ Object
- #export_rows(dump_path, table, row_data) ⇒ Object
- #export_schema(dump_path, table, schema_data) ⇒ Object
- #format_data(db, data, opts = {}) ⇒ Object
-
#incorrect_blobs(db, table) ⇒ Object
mysql text and blobs fields are handled the same way internally this is not true for other databases so we must check if the field is actually text and manually convert it back to a string.
- #load_indexes(database_url, index) ⇒ Object
- #load_schema(dump_path, database_url_or_db, table) ⇒ Object
- #order_by(db, table) ⇒ Object
- #primary_key(db, table) ⇒ Object
- #schema_bin(command, *args) ⇒ Object
- #single_integer_primary_key(db, table) ⇒ Object
- #valid_data?(data, crc32) ⇒ Boolean
- #windows? ⇒ Boolean
Instance Method Details
#base64decode(data) ⇒ Object
36 37 38 |
# File 'lib/tapsoob/utils.rb', line 36 def base64decode(data) data.unpack("m").first end |
#base64encode(data) ⇒ Object
32 33 34 |
# File 'lib/tapsoob/utils.rb', line 32 def base64encode(data) [data].pack("m") end |
#bin(cmd) ⇒ Object
19 20 21 22 |
# File 'lib/tapsoob/utils.rb', line 19 def bin(cmd) cmd = "#{cmd}.cmd" if windows? cmd end |
#calculate_chunksize(old_chunksize) ⇒ Object
130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
# File 'lib/tapsoob/utils.rb', line 130 def calculate_chunksize(old_chunksize) c = Tapsoob::Chunksize.new(old_chunksize) begin c.start_time = Time.now c.time_in_db = yield c rescue Errno::EPIPE c.retries += 1 raise if c.retries > 2 # we got disconnected, the chunksize could be too large # reset the chunksize based on the number of retries c.reset_chunksize retry end c.end_time = Time.now c.calc_new_chunksize end |
#checksum(data) ⇒ Object
24 25 26 |
# File 'lib/tapsoob/utils.rb', line 24 def checksum(data) Zlib.crc32(data) end |
#encode_blobs(row, columns) ⇒ Object
109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
# File 'lib/tapsoob/utils.rb', line 109 def encode_blobs(row, columns) # Encode columns known to be blobs columns.each do |c| if row[c].is_a?(Sequel::SQL::Blob) row[c] = base64encode(row[c]) unless row[c].nil? elsif !row[c].nil? && row[c].encoding == Encoding::ASCII_8BIT # Handle binary data that might not be wrapped in Sequel::SQL::Blob row[c] = base64encode(row[c]) end end unless columns.size == 0 # Also check all values for Sequel::SQL::Blob objects that might not be in the columns list row.each do |key, value| if value.is_a?(Sequel::SQL::Blob) && !columns.include?(key) row[key] = base64encode(value) end end row end |
#export_indexes(dump_path, table, index_data) ⇒ Object
156 157 158 159 160 161 162 163 164 165 |
# File 'lib/tapsoob/utils.rb', line 156 def export_indexes(dump_path, table, index_data) # Use append-only writes to avoid O(n²) complexity # Each index_data is a migration string that should be stored as NDJSON index_file = File.join(dump_path, "indexes", "#{table}.json") File.open(index_file, 'a') do |file| # Store as NDJSON - each line is a JSON-encoded migration string file.write(JSON.generate(index_data) + "\n") end end |
#export_rows(dump_path, table, row_data) ⇒ Object
167 168 169 170 171 172 173 174 175 |
# File 'lib/tapsoob/utils.rb', line 167 def export_rows(dump_path, table, row_data) # Use append-only writes to avoid O(n²) complexity # Store metadata separately and append data chunks as NDJSON data_file = File.join(dump_path, "data", "#{table}.json") File.open(data_file, 'a') do |file| file.write(JSON.generate(row_data) + "\n") end end |
#export_schema(dump_path, table, schema_data) ⇒ Object
150 151 152 153 154 |
# File 'lib/tapsoob/utils.rb', line 150 def export_schema(dump_path, table, schema_data) File.open(File.join(dump_path, "schemas", "#{table}.rb"), 'w') do |file| file.write(schema_data) end end |
#format_data(db, data, opts = {}) ⇒ Object
40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
# File 'lib/tapsoob/utils.rb', line 40 def format_data(db, data, opts = {}) return {} if data.size == 0 string_columns = opts[:string_columns] || [] schema = opts[:schema] || [] table = opts[:table] max_lengths = schema.inject({}) do |hash, (column, )| if [:db_type] =~ /^varchar\((\d+)\)/ hash.update(column => $1.to_i) end hash end header = data[0].keys only_data = data.collect do |row| row = encode_blobs(row, string_columns) row.each do |column, data| if data.to_s.length > (max_lengths[column] || data.to_s.length) raise Tapsoob::InvalidData.new(<<-ERROR) Detected data that exceeds the length limitation of its column. This is generally due to the fact that SQLite does not enforce length restrictions. Table : #{table} Column : #{column} Type : #{schema.detect{|s| s.first == column}.last[:db_type]} Data : #{data} ERROR end # Type conversion row[column] = data.strftime('%Y-%m-%d %H:%M:%S') if data.is_a?(Time) end header.collect { |h| row[h] } end res = { table_name: table, header: header, data: only_data } # Add types if schema isn't empty db.extension :schema_dumper # Add schema dumper extension in case it hasn't been added until now res[:types] = schema.map do |c| case db.column_schema_to_ruby_type(c.last)[:type].to_s when "BigDecimal" "float" when "Bignum" "integer" when "File" "blob" when "TrueClass" "boolean" else db.column_schema_to_ruby_type(c.last)[:type].to_s.downcase end end unless schema.empty? res end |
#incorrect_blobs(db, table) ⇒ Object
mysql text and blobs fields are handled the same way internally this is not true for other databases so we must check if the field is actually text and manually convert it back to a string
100 101 102 103 104 105 106 107 |
# File 'lib/tapsoob/utils.rb', line 100 def incorrect_blobs(db, table) columns = [] db.schema(table).each do |data| column, cdata = data columns << column if cdata[:type] == :blob end columns end |
#load_indexes(database_url, index) ⇒ Object
189 190 191 |
# File 'lib/tapsoob/utils.rb', line 189 def load_indexes(database_url, index) Tapsoob::Schema.load_indexes(database_url, index) end |
#load_schema(dump_path, database_url_or_db, table) ⇒ Object
177 178 179 180 181 182 183 184 185 186 187 |
# File 'lib/tapsoob/utils.rb', line 177 def load_schema(dump_path, database_url_or_db, table) schema_file = File.join(dump_path, "schemas", "#{table}.rb") schema_content = File.read(schema_file) # If we have a connection object, use it directly for better performance if database_url_or_db.is_a?(Sequel::Database) Tapsoob::Schema.load(database_url_or_db, schema_content) else schema_bin(:load, database_url_or_db, schema_file) end end |
#order_by(db, table) ⇒ Object
210 211 212 213 214 215 216 217 218 |
# File 'lib/tapsoob/utils.rb', line 210 def order_by(db, table) pkey = primary_key(db, table) if pkey pkey.kind_of?(Array) ? pkey : [pkey.to_sym] else table = table.to_sym unless table.kind_of?(Sequel::SQL::Identifier) db[table].columns end end |
#primary_key(db, table) ⇒ Object
200 201 202 |
# File 'lib/tapsoob/utils.rb', line 200 def primary_key(db, table) db.schema(table).select { |c| c[1][:primary_key] }.map { |c| c[0] } end |
#schema_bin(command, *args) ⇒ Object
193 194 195 196 197 198 |
# File 'lib/tapsoob/utils.rb', line 193 def schema_bin(command, *args) require 'tapsoob/cli' subcommand = "schema" script = Tapsoob::CLI::Schema.new script.invoke(command, args.map { |a| "#{a}" }) end |
#single_integer_primary_key(db, table) ⇒ Object
204 205 206 207 208 |
# File 'lib/tapsoob/utils.rb', line 204 def single_integer_primary_key(db, table) table = table.to_sym unless table.kind_of?(Sequel::SQL::Identifier) keys = db.schema(table).select { |c| c[1][:primary_key] and c[1][:type] == :integer } not keys.nil? and keys.size == 1 end |
#valid_data?(data, crc32) ⇒ Boolean
28 29 30 |
# File 'lib/tapsoob/utils.rb', line 28 def valid_data?(data, crc32) Zlib.crc32(data) == crc32.to_i end |
#windows? ⇒ Boolean
13 14 15 16 17 |
# File 'lib/tapsoob/utils.rb', line 13 def windows? return @windows if defined?(@windows) require 'rbconfig' @windows = !!(::RbConfig::CONFIG['host_os'] =~ /mswin|mingw/) end |