Class: GitLab::Exporter::Database::RowCountCollector

Inherits:
Base
  • Object
show all
Defined in:
lib/gitlab_exporter/database/row_count.rb

Overview

A helper class that executes the query its given and returns an int of the row count This class works under the assumption you do COUNT(*) queries, define queries in the QUERIES constant. If in doubt how these work, read #construct_query rubocop:disable Metrics/ClassLength

Constant Summary collapse

WHERE_MIRROR_ENABLED =

We ignore mirrors with a next_execution_timestamp before 2020-03-28 because this is when we stopped processing mirrors for private projects on the free plan. Skipping those can significantly improve query performance: gitlab.com/gitlab-org/gitlab/-/issues/216252#note_334514544

<<~SQL.freeze
  projects.mirror = true
  AND projects.archived = false
  AND project_mirror_data.retry_count <= 14
  AND (
    (projects.visibility_level = 20 AND root_namespaces.visibility_level = 20)
    OR
    plans.name IN ('early_adopter', 'bronze', 'silver', 'gold')
  )
  AND project_mirror_data.next_execution_timestamp > '2020-03-28'
SQL
MIRROR_QUERY =
{
  select: :projects,
  joins: <<~SQL,
    INNER JOIN project_mirror_data ON project_mirror_data.project_id = projects.id
    INNER JOIN namespaces AS root_namespaces ON root_namespaces.id = (
      WITH RECURSIVE "base_and_ancestors" AS (
        (SELECT "namespaces".* FROM "namespaces" WHERE "namespaces"."id" = projects.namespace_id)
        UNION
        (SELECT "namespaces".* FROM "namespaces", "base_and_ancestors" WHERE "namespaces"."id" = "base_and_ancestors"."parent_id")
      ) SELECT "namespaces".id FROM "base_and_ancestors" AS "namespaces" WHERE "namespaces"."parent_id" IS NULL
    )
    LEFT JOIN gitlab_subscriptions ON gitlab_subscriptions.namespace_id = root_namespaces.id
    LEFT JOIN plans ON plans.id = gitlab_subscriptions.hosted_plan_id
  SQL
  check: "SELECT 1 FROM information_schema.tables WHERE table_name='plans'"
}.freeze
QUERIES =
{
  mirrors_ready_to_sync: MIRROR_QUERY.merge( # EE only
    where: <<~SQL
      #{WHERE_MIRROR_ENABLED}
      AND project_mirror_data.status NOT IN ('scheduled', 'started')
      AND project_mirror_data.next_execution_timestamp <= NOW()
    SQL
  ),
  mirrors_not_updated_recently: MIRROR_QUERY.merge( # EE only
    where: <<~SQL
      #{WHERE_MIRROR_ENABLED}
      AND project_mirror_data.status NOT IN ('scheduled', 'started')
      AND (project_mirror_data.next_execution_timestamp - project_mirror_data.last_update_at) <= '30 minutes'::interval
      AND project_mirror_data.last_update_at < NOW() - '30 minutes'::interval
    SQL
  ),
  mirrors_updated_very_recently: MIRROR_QUERY.merge( # EE only
    where: <<~SQL
      #{WHERE_MIRROR_ENABLED}
      AND project_mirror_data.status NOT IN ('scheduled', 'started')
      AND project_mirror_data.last_update_at >= NOW() - '30 seconds'::interval
    SQL
  ),
  mirrors_behind_schedule: MIRROR_QUERY.merge( # EE only
    where: <<~SQL
      #{WHERE_MIRROR_ENABLED}
      AND project_mirror_data.status NOT IN ('scheduled', 'started')
      AND project_mirror_data.next_execution_timestamp <= NOW() - '10 seconds'::interval
    SQL
  ),
  mirrors_scheduled_or_started: MIRROR_QUERY.merge( # EE only
    where: <<~SQL
      #{WHERE_MIRROR_ENABLED}
      AND project_mirror_data.status IN ('scheduled', 'started')
    SQL
  ),
  mirrors_scheduled: MIRROR_QUERY.merge( # EE only
    where: <<~SQL
      #{WHERE_MIRROR_ENABLED}
      AND project_mirror_data.status = 'scheduled'
    SQL
  ),
  mirrors_started: MIRROR_QUERY.merge( # EE only
    where: <<~SQL
      #{WHERE_MIRROR_ENABLED}
      AND project_mirror_data.status = 'started'
    SQL
  ),
  soft_deleted_projects: { select: :projects, where: "pending_delete=true" },
  orphaned_projects: {
    select: :projects,
    joins: "LEFT JOIN namespaces ON projects.namespace_id = namespaces.id",
    where: "namespaces.id IS NULL"
  },
  uploads: { select: :uploads },
  users: {
    select: :users,
    joins: "LEFT JOIN
      (
        SELECT
          members.user_id,
          MAX(access_level) as access_level
        FROM members
        GROUP BY members.user_id
      ) AS u
      ON users.id = u.user_id",
    where: "user_type IS NULL",
    fields: {
      admin: {},
      external: {},
      state: {},
      access_level: { definition: "COALESCE(u.access_level, 0)" }
    }
  },
  projects: {
    select: :projects,
    fields: {
      visibility_level: {},
      archived: {}
    }
  },
  groups: {
    select: :namespaces,
    fields: {
      visibility_level: {},
      root: { definition: "(parent_id IS NULL)" }
    }
  }
}.freeze

Constants inherited from Base

Base::POOL_SIZE, Base::POOL_TIMEOUT

Instance Method Summary collapse

Methods inherited from Base

configure_type_map_for_results, connection_pool, #connection_pool, #with_connection_pool

Constructor Details

#initialize(args) ⇒ RowCountCollector

Returns a new instance of RowCountCollector.



137
138
139
140
141
# File 'lib/gitlab_exporter/database/row_count.rb', line 137

def initialize(args)
  super(args)

  @selected_queries = Set.new(args[:selected_queries].map(&:to_sym)) unless args[:selected_queries].nil?
end

Instance Method Details

#runObject



143
144
145
146
147
148
149
150
151
152
153
154
# File 'lib/gitlab_exporter/database/row_count.rb', line 143

def run
  results = Hash.new(0)

  QUERIES.each do |key, query_hash|
    next if query_hash[:check] && !successful_check?(query_hash[:check])
    next if !@selected_queries.nil? && !@selected_queries.include?(key)

    results[key] = count_from_query_hash(query_hash)
  end

  results
end