Module: Tapsoob::Utils

Extended by:: Utils

Included in:: Utils

Defined in:: lib/tapsoob/utils.rb

Instance Method Summary collapse

#base64decode(data) ⇒ Object
#base64encode(data) ⇒ Object
#bin(cmd) ⇒ Object
#calculate_chunksize(old_chunksize) ⇒ Object
#checksum(data) ⇒ Object
#encode_blobs(row, columns) ⇒ Object
#export_indexes(dump_path, table, index_data) ⇒ Object
#export_rows(dump_path, table, row_data) ⇒ Object
#export_schema(dump_path, table, schema_data) ⇒ Object
#format_data(db, data, opts = {}) ⇒ Object
#incorrect_blobs(db, table) ⇒ Object

mysql text and blobs fields are handled the same way internally this is not true for other databases so we must check if the field is actually text and manually convert it back to a string.
#load_indexes(database_url, index) ⇒ Object
#load_schema(dump_path, database_url_or_db, table) ⇒ Object
#order_by(db, table) ⇒ Object
#primary_key(db, table) ⇒ Object
#schema_bin(command, *args) ⇒ Object
#single_integer_primary_key(db, table) ⇒ Object
#valid_data?(data, crc32) ⇒ Boolean
#windows? ⇒ Boolean

Instance Method Details

#base64decode(data) ⇒ `Object`



36
37
38

# File 'lib/tapsoob/utils.rb', line 36

def base64decode(data)
  data.unpack("m").first
end

#base64encode(data) ⇒ `Object`



32
33
34

# File 'lib/tapsoob/utils.rb', line 32

def base64encode(data)
  [data].pack("m")
end

#bin(cmd) ⇒ `Object`

# File 'lib/tapsoob/utils.rb', line 19

def bin(cmd)
  cmd = "#{cmd}.cmd" if windows?
  cmd
end

#calculate_chunksize(old_chunksize) ⇒ `Object`

# File 'lib/tapsoob/utils.rb', line 130

def calculate_chunksize(old_chunksize)
  c = Tapsoob::Chunksize.new(old_chunksize)

  begin
    c.start_time = Time.now
    c.time_in_db = yield c
  rescue Errno::EPIPE
    c.retries += 1
    raise if c.retries > 2

    # we got disconnected, the chunksize could be too large
    # reset the chunksize based on the number of retries
    c.reset_chunksize
    retry
  end

  c.end_time = Time.now
  c.calc_new_chunksize
end

#checksum(data) ⇒ `Object`



24
25
26

# File 'lib/tapsoob/utils.rb', line 24

def checksum(data)
  Zlib.crc32(data)
end

#encode_blobs(row, columns) ⇒ `Object`

# File 'lib/tapsoob/utils.rb', line 109

def encode_blobs(row, columns)
  # Encode columns known to be blobs
  columns.each do |c|
    if row[c].is_a?(Sequel::SQL::Blob)
      row[c] = base64encode(row[c]) unless row[c].nil?
    elsif !row[c].nil? && row[c].encoding == Encoding::ASCII_8BIT
      # Handle binary data that might not be wrapped in Sequel::SQL::Blob
      row[c] = base64encode(row[c])
    end
  end unless columns.size == 0

  # Also check all values for Sequel::SQL::Blob objects that might not be in the columns list
  row.each do |key, value|
    if value.is_a?(Sequel::SQL::Blob) && !columns.include?(key)
      row[key] = base64encode(value)
    end
  end

  row
end

#export_indexes(dump_path, table, index_data) ⇒ `Object`

# File 'lib/tapsoob/utils.rb', line 156

def export_indexes(dump_path, table, index_data)
  # Use append-only writes to avoid O(n²) complexity
  # Each index_data is a migration string that should be stored as NDJSON
  index_file = File.join(dump_path, "indexes", "#{table}.json")

  File.open(index_file, 'a') do |file|
    # Store as NDJSON - each line is a JSON-encoded migration string
    file.write(JSON.generate(index_data) + "\n")
  end
end

#export_rows(dump_path, table, row_data) ⇒ `Object`

# File 'lib/tapsoob/utils.rb', line 167

def export_rows(dump_path, table, row_data)
  # Use append-only writes to avoid O(n²) complexity
  # Store metadata separately and append data chunks as NDJSON
  data_file = File.join(dump_path, "data", "#{table}.json")

  File.open(data_file, 'a') do |file|
    file.write(JSON.generate(row_data) + "\n")
  end
end

#export_schema(dump_path, table, schema_data) ⇒ `Object`

# File 'lib/tapsoob/utils.rb', line 150

def export_schema(dump_path, table, schema_data)
  File.open(File.join(dump_path, "schemas", "#{table}.rb"), 'w') do |file|
    file.write(schema_data)
  end
end

#format_data(db, data, opts = {}) ⇒ `Object`

# File 'lib/tapsoob/utils.rb', line 40

def format_data(db, data, opts = {})
  return {} if data.size == 0
  string_columns = opts[:string_columns] || []
  schema = opts[:schema] || []
  table = opts[:table]

  max_lengths = schema.inject({}) do |hash, (column, meta)|
    if meta[:db_type] =~ /^varchar\((\d+)\)/
      hash.update(column => $1.to_i)
    end
    hash
  end

  header = data[0].keys
  only_data = data.collect do |row|
    row = encode_blobs(row, string_columns)
    row.each do |column, data|
      if data.to_s.length > (max_lengths[column] || data.to_s.length)
        raise Tapsoob::InvalidData.new(<<-ERROR)
Detected data that exceeds the length limitation of its column. This is
generally due to the fact that SQLite does not enforce length restrictions.

Table : #{table}
Column : #{column}
Type : #{schema.detect{|s| s.first == column}.last[:db_type]}
Data : #{data}
        ERROR
      end

      # Type conversion
      row[column] = data.strftime('%Y-%m-%d %H:%M:%S') if data.is_a?(Time)
    end
    header.collect { |h| row[h] }
  end

  res = { table_name: table, header: header, data: only_data }

  # Add types if schema isn't empty
  db.extension :schema_dumper # Add schema dumper extension in case it hasn't been added until now
  res[:types] = schema.map do |c|
    case db.column_schema_to_ruby_type(c.last)[:type].to_s
    when "BigDecimal"
      "float"
    when "Bignum"
      "integer"
    when "File"
      "blob"
    when "TrueClass"
      "boolean"
    else
      db.column_schema_to_ruby_type(c.last)[:type].to_s.downcase
    end
  end unless schema.empty?

  res
end

#incorrect_blobs(db, table) ⇒ `Object`

mysql text and blobs fields are handled the same way internally this is not true for other databases so we must check if the field is actually text and manually convert it back to a string

# File 'lib/tapsoob/utils.rb', line 100

def incorrect_blobs(db, table)
  columns = []
  db.schema(table).each do |data|
    column, cdata = data
    columns << column if cdata[:type] == :blob
  end
  columns
end

#load_indexes(database_url, index) ⇒ `Object`



189
190
191

# File 'lib/tapsoob/utils.rb', line 189

def load_indexes(database_url, index)
  Tapsoob::Schema.load_indexes(database_url, index)
end

#load_schema(dump_path, database_url_or_db, table) ⇒ `Object`

# File 'lib/tapsoob/utils.rb', line 177

def load_schema(dump_path, database_url_or_db, table)
  schema_file = File.join(dump_path, "schemas", "#{table}.rb")
  schema_content = File.read(schema_file)

  # If we have a connection object, use it directly for better performance
  if database_url_or_db.is_a?(Sequel::Database)
    Tapsoob::Schema.load(database_url_or_db, schema_content)
  else
    schema_bin(:load, database_url_or_db, schema_file)
  end
end

#order_by(db, table) ⇒ `Object`

# File 'lib/tapsoob/utils.rb', line 210

def order_by(db, table)
  pkey = primary_key(db, table)
  if pkey
    pkey.kind_of?(Array) ? pkey : [pkey.to_sym]
  else
    table = table.to_sym unless table.kind_of?(Sequel::SQL::Identifier)
    db[table].columns
  end
end

#primary_key(db, table) ⇒ `Object`



200
201
202

# File 'lib/tapsoob/utils.rb', line 200

def primary_key(db, table)
  db.schema(table).select { |c| c[1][:primary_key] }.map { |c| c[0] }
end

#schema_bin(command, *args) ⇒ `Object`

# File 'lib/tapsoob/utils.rb', line 193

def schema_bin(command, *args)
  require 'tapsoob/cli'
  subcommand = "schema"
  script = Tapsoob::CLI::Schema.new
  script.invoke(command, args.map { |a| "#{a}" })
end

#single_integer_primary_key(db, table) ⇒ `Object`

# File 'lib/tapsoob/utils.rb', line 204

def single_integer_primary_key(db, table)
  table = table.to_sym unless table.kind_of?(Sequel::SQL::Identifier)
  keys = db.schema(table).select { |c| c[1][:primary_key] and c[1][:type] == :integer }
  not keys.nil? and keys.size == 1
end

#valid_data?(data, crc32) ⇒ `Boolean`

Returns:

(Boolean)



28
29
30

# File 'lib/tapsoob/utils.rb', line 28

def valid_data?(data, crc32)
  Zlib.crc32(data) == crc32.to_i
end

#windows? ⇒ `Boolean`

Returns:

(Boolean)

# File 'lib/tapsoob/utils.rb', line 13

def windows?
  return @windows if defined?(@windows)
  require 'rbconfig'
  @windows = !!(::RbConfig::CONFIG['host_os'] =~ /mswin|mingw/)
end

Module: Tapsoob::Utils

Instance Method Summary collapse

Instance Method Details

#base64decode(data) ⇒ Object

#base64encode(data) ⇒ Object

#bin(cmd) ⇒ Object

#calculate_chunksize(old_chunksize) ⇒ Object

#checksum(data) ⇒ Object

#encode_blobs(row, columns) ⇒ Object

#export_indexes(dump_path, table, index_data) ⇒ Object

#export_rows(dump_path, table, row_data) ⇒ Object

#export_schema(dump_path, table, schema_data) ⇒ Object

#format_data(db, data, opts = {}) ⇒ Object

#incorrect_blobs(db, table) ⇒ Object

#load_indexes(database_url, index) ⇒ Object

#load_schema(dump_path, database_url_or_db, table) ⇒ Object

#order_by(db, table) ⇒ Object

#primary_key(db, table) ⇒ Object

#schema_bin(command, *args) ⇒ Object

#single_integer_primary_key(db, table) ⇒ Object

#valid_data?(data, crc32) ⇒ Boolean

#windows? ⇒ Boolean