Class: Blazer::Adapters::SqlAdapter

Inherits:

BaseAdapter

Object
BaseAdapter
Blazer::Adapters::SqlAdapter

show all

Defined in:: lib/blazer/adapters/sql_adapter.rb

Direct Known Subclasses

SnowflakeAdapter

Instance Attribute Summary collapse

#connection_model ⇒ Object readonly

Returns the value of attribute connection_model.

Attributes inherited from BaseAdapter

#data_source

Instance Method Summary collapse

#cachable?(statement) ⇒ Boolean
#cancel(run_id) ⇒ Object
#cohort_analysis_statement(statement, period:, days:) ⇒ Object

TODO treat date columns as already in time zone.
#cost(statement) ⇒ Object
#explain(statement) ⇒ Object
#initialize(data_source) ⇒ SqlAdapter constructor

A new instance of SqlAdapter.
#preview_statement ⇒ Object
#reconnect ⇒ Object
#run_statement(statement, comment) ⇒ Object
#schema ⇒ Object
#supports_cohort_analysis? ⇒ Boolean
#tables ⇒ Object

Constructor Details

#initialize(data_source) ⇒ `SqlAdapter`

Returns a new instance of SqlAdapter.

# File 'lib/blazer/adapters/sql_adapter.rb', line 6

def initialize(data_source)
  super

  @connection_model =
    Class.new(Blazer::Connection) do
      def self.name
        "Blazer::Connection::Adapter#{object_id}"
      end
      establish_connection(data_source.settings["url"]) if data_source.settings["url"]
    end
end

Instance Attribute Details

#connection_model ⇒ `Object` (readonly)

Returns the value of attribute connection_model.



4
5
6

# File 'lib/blazer/adapters/sql_adapter.rb', line 4

def connection_model
  @connection_model
end

Instance Method Details

#cachable?(statement) ⇒ `Boolean`

Returns:

(Boolean)



121
122
123

# File 'lib/blazer/adapters/sql_adapter.rb', line 121

def cachable?(statement)
  !%w[CREATE ALTER UPDATE INSERT DELETE].include?(statement.split.first.to_s.upcase)
end

#cancel(run_id) ⇒ `Object`

# File 'lib/blazer/adapters/sql_adapter.rb', line 110

def cancel(run_id)
  if postgresql?
    select_all("SELECT pg_cancel_backend(pid) FROM pg_stat_activity WHERE pid <> pg_backend_pid() AND query LIKE ?", ["%,run_id:#{run_id}%"])
  elsif redshift?
    first_row = select_all("SELECT pid FROM stv_recents WHERE status = 'Running' AND query LIKE ?", ["%,run_id:#{run_id}%"]).first
    if first_row
      select_all("CANCEL #{first_row["pid"].to_i}")
    end
  end
end

#cohort_analysis_statement(statement, period:, days:) ⇒ `Object`

TODO treat date columns as already in time zone

# File 'lib/blazer/adapters/sql_adapter.rb', line 130

def cohort_analysis_statement(statement, period:, days:)
  raise "Cohort analysis not supported" unless supports_cohort_analysis?

  cohort_column = statement =~ /\bcohort_time\b/ ? "cohort_time" : "conversion_time"
  tzname = Blazer.time_zone.tzinfo.name

  if mysql?
    time_sql = "CONVERT_TZ(cohorts.cohort_time, '+00:00', ?)"
    case period
    when "day"
      date_sql = "CAST(DATE_FORMAT(#{time_sql}, '%Y-%m-%d') AS DATE)"
      date_params = [tzname]
    when "week"
      date_sql = "CAST(DATE_FORMAT(#{time_sql} - INTERVAL ((5 + DAYOFWEEK(#{time_sql})) % 7) DAY, '%Y-%m-%d') AS DATE)"
      date_params = [tzname, tzname]
    else
      date_sql = "CAST(DATE_FORMAT(#{time_sql}, '%Y-%m-01') AS DATE)"
      date_params = [tzname]
    end
    bucket_sql = "CAST(CEIL(TIMESTAMPDIFF(SECOND, cohorts.cohort_time, query.conversion_time) / ?) AS INTEGER)"
  else
    date_sql = "date_trunc(?, cohorts.cohort_time::timestamptz AT TIME ZONE ?)::date"
    date_params = [period, tzname]
    bucket_sql = "CEIL(EXTRACT(EPOCH FROM query.conversion_time - cohorts.cohort_time) / ?)::int"
  end

  # WITH not an optimization fence in Postgres 12+
  statement = "    WITH query AS (\n      \#{statement}\n    ),\n    cohorts AS (\n      SELECT user_id, MIN(\#{cohort_column}) AS cohort_time FROM query\n      WHERE user_id IS NOT NULL AND \#{cohort_column} IS NOT NULL\n      GROUP BY 1\n    )\n    SELECT\n      \#{date_sql} AS period,\n      0 AS bucket,\n      COUNT(DISTINCT cohorts.user_id)\n    FROM cohorts GROUP BY 1\n    UNION ALL\n    SELECT\n      \#{date_sql} AS period,\n      \#{bucket_sql} AS bucket,\n      COUNT(DISTINCT query.user_id)\n    FROM cohorts INNER JOIN query ON query.user_id = cohorts.user_id\n    WHERE query.conversion_time IS NOT NULL\n    AND query.conversion_time >= cohorts.cohort_time\n    \#{cohort_column == \"conversion_time\" ? \"AND query.conversion_time != cohorts.cohort_time\" : \"\"}\n    GROUP BY 1, 2\n  SQL\n  params = [statement] + date_params + date_params + [days.to_i * 86400]\n  connection_model.send(:sanitize_sql_array, params)\nend\n"

#cost(statement) ⇒ `Object`

# File 'lib/blazer/adapters/sql_adapter.rb', line 84

def cost(statement)
  result = explain(statement)
  if sqlserver?
    result["TotalSubtreeCost"]
  else
    match = /cost=\d+\.\d+..(\d+\.\d+) /.match(result)
    match[1] if match
  end
end

#explain(statement) ⇒ `Object`

# File 'lib/blazer/adapters/sql_adapter.rb', line 94

def explain(statement)
  if postgresql? || redshift?
    select_all("EXPLAIN #{statement}").rows.first.first
  elsif sqlserver?
    begin
      execute("SET SHOWPLAN_ALL ON")
      result = select_all(statement).each.first
    ensure
      execute("SET SHOWPLAN_ALL OFF")
    end
    result
  end
rescue
  nil
end

#preview_statement ⇒ `Object`

# File 'lib/blazer/adapters/sql_adapter.rb', line 72

def preview_statement
  if sqlserver?
    "SELECT TOP (10) * FROM {table}"
  else
    "SELECT * FROM {table} LIMIT 10"
  end
end

#reconnect ⇒ `Object`



80
81
82

# File 'lib/blazer/adapters/sql_adapter.rb', line 80

def reconnect
  connection_model.establish_connection(settings["url"])
end

#run_statement(statement, comment) ⇒ `Object`

# File 'lib/blazer/adapters/sql_adapter.rb', line 18

def run_statement(statement, comment)
  columns = []
  rows = []
  error = nil

  begin
    in_transaction do
      set_timeout(data_source.timeout) if data_source.timeout

      result = select_all("#{statement} /*#{comment}*/")
      columns = result.columns
      result.rows.each do |untyped_row|
        rows << (result.column_types.empty? ? untyped_row : columns.each_with_index.map { |c, i| untyped_row[i] && result.column_types[c] ? result.column_types[c].send(:cast_value, untyped_row[i]) : untyped_row[i] })
      end
    end
  rescue => e
    error = e.message.sub(/.+ERROR: /, "")
    error = Blazer::TIMEOUT_MESSAGE if Blazer::TIMEOUT_ERRORS.any? { |e| error.include?(e) }
    reconnect if error.include?("PG::ConnectionBad")
  end

  [columns, rows, error]
end

#schema ⇒ `Object`

# File 'lib/blazer/adapters/sql_adapter.rb', line 66

def schema
  sql = add_schemas("SELECT table_schema, table_name, column_name, data_type, ordinal_position FROM information_schema.columns")
  result = data_source.run_statement(sql)
  result.rows.group_by { |r| [r[0], r[1]] }.map { |k, vs| {schema: k[0], table: k[1], columns: vs.sort_by { |v| v[2] }.map { |v| {name: v[2], data_type: v[3]} }} }.sort_by { |t| [t[:schema] == default_schema ? "" : t[:schema], t[:table]] }
end

#supports_cohort_analysis? ⇒ `Boolean`

Returns:

(Boolean)



125
126
127

# File 'lib/blazer/adapters/sql_adapter.rb', line 125

def supports_cohort_analysis?
  postgresql? || mysql?
end

#tables ⇒ `Object`

# File 'lib/blazer/adapters/sql_adapter.rb', line 42

def tables
  sql = add_schemas("SELECT table_schema, table_name FROM information_schema.tables")
  result = data_source.run_statement(sql, refresh_cache: true)
  if postgresql? || redshift? || snowflake?
    result.rows.sort_by { |r| [r[0] == default_schema ? "" : r[0], r[1]] }.map do |row|
      table =
        if row[0] == default_schema
          row[1]
        else
          "#{row[0]}.#{row[1]}"
        end

      table = table.downcase if snowflake?

      {
        table: table,
        value: connection_model.connection.quote_table_name(table)
      }
    end
  else
    result.rows.map(&:second).sort
  end
end

Class: Blazer::Adapters::SqlAdapter

Direct Known Subclasses

Instance Attribute Summary collapse

Attributes inherited from BaseAdapter

Instance Method Summary collapse

Constructor Details

#initialize(data_source) ⇒ SqlAdapter

Instance Attribute Details

#connection_model ⇒ Object (readonly)

Instance Method Details

#cachable?(statement) ⇒ Boolean

#cancel(run_id) ⇒ Object

#cohort_analysis_statement(statement, period:, days:) ⇒ Object

#cost(statement) ⇒ Object

#explain(statement) ⇒ Object

#preview_statement ⇒ Object

#reconnect ⇒ Object

#run_statement(statement, comment) ⇒ Object

#schema ⇒ Object

#supports_cohort_analysis? ⇒ Boolean

#tables ⇒ Object

#initialize(data_source) ⇒ `SqlAdapter`

#connection_model ⇒ `Object` (readonly)

#cachable?(statement) ⇒ `Boolean`

#cancel(run_id) ⇒ `Object`

#cohort_analysis_statement(statement, period:, days:) ⇒ `Object`

#cost(statement) ⇒ `Object`

#explain(statement) ⇒ `Object`

#preview_statement ⇒ `Object`

#reconnect ⇒ `Object`

#run_statement(statement, comment) ⇒ `Object`

#schema ⇒ `Object`

#supports_cohort_analysis? ⇒ `Boolean`

#tables ⇒ `Object`