Class: GitLab::Exporter::Database::RowCountCollector

Inherits:
Base
  • Object
show all
Defined in:
lib/gitlab_exporter/database/row_count.rb

Overview

A helper class that executes the query its given and returns an int of the row count This class works under the assumption you do COUNT(*) queries, define queries in the QUERIES constant. If in doubt how these work, read #construct_query rubocop:disable Metrics/ClassLength

Constant Summary collapse

WHERE_MIRROR_ENABLED =

We ignore mirrors with a next_execution_timestamp before 2020-03-28 because this is when we stopped processing mirrors for private projects on the free plan. Skipping those can significantly improve query performance: gitlab.com/gitlab-org/gitlab/-/issues/216252#note_334514544

<<~SQL.freeze
  projects.mirror = true
  AND projects.archived = false
  AND project_mirror_data.retry_count <= 14
  AND (
    (projects.visibility_level = 20 AND root_namespaces.visibility_level = 20)
    OR
    plans.name IN ('early_adopter', 'bronze', 'silver', 'gold')
  )
  AND project_mirror_data.next_execution_timestamp > '2020-03-28'
SQL
MIRROR_QUERY =
{
  select: :projects,
  joins: <<~SQL,
    INNER JOIN project_mirror_data ON project_mirror_data.project_id = projects.id
    INNER JOIN namespaces AS root_namespaces ON root_namespaces.id = (
      WITH RECURSIVE "base_and_ancestors" AS (
        (SELECT "namespaces".* FROM "namespaces" WHERE "namespaces"."id" = projects.namespace_id)
        UNION
        (SELECT "namespaces".* FROM "namespaces", "base_and_ancestors" WHERE "namespaces"."id" = "base_and_ancestors"."parent_id")
      ) SELECT "namespaces".id FROM "base_and_ancestors" AS "namespaces" WHERE "namespaces"."parent_id" IS NULL
    )
    LEFT JOIN gitlab_subscriptions ON gitlab_subscriptions.namespace_id = root_namespaces.id
    LEFT JOIN plans ON plans.id = gitlab_subscriptions.hosted_plan_id
  SQL
  check: "SELECT 1 FROM information_schema.tables WHERE table_name='plans'"
}.freeze
QUERIES =
{
  mirrors_ready_to_sync: MIRROR_QUERY.merge( # EE only
    where: <<~SQL
      #{WHERE_MIRROR_ENABLED}
      AND project_mirror_data.status NOT IN ('scheduled', 'started')
      AND project_mirror_data.next_execution_timestamp <= NOW()
    SQL
  ),
  mirrors_not_updated_recently: MIRROR_QUERY.merge( # EE only
    where: <<~SQL
      #{WHERE_MIRROR_ENABLED}
      AND project_mirror_data.status NOT IN ('scheduled', 'started')
      AND (project_mirror_data.next_execution_timestamp - project_mirror_data.last_update_at) <= '30 minutes'::interval
      AND project_mirror_data.last_update_at < NOW() - '30 minutes'::interval
    SQL
  ),
  mirrors_updated_very_recently: MIRROR_QUERY.merge( # EE only
    where: <<~SQL
      #{WHERE_MIRROR_ENABLED}
      AND project_mirror_data.status NOT IN ('scheduled', 'started')
      AND project_mirror_data.last_update_at >= NOW() - '30 seconds'::interval
    SQL
  ),
  mirrors_behind_schedule: MIRROR_QUERY.merge( # EE only
    where: <<~SQL
      #{WHERE_MIRROR_ENABLED}
      AND project_mirror_data.status NOT IN ('scheduled', 'started')
      AND project_mirror_data.next_execution_timestamp <= NOW() - '10 seconds'::interval
    SQL
  ),
  mirrors_scheduled_or_started: MIRROR_QUERY.merge( # EE only
    where: <<~SQL
      #{WHERE_MIRROR_ENABLED}
      AND project_mirror_data.status IN ('scheduled', 'started')
    SQL
  ),
  mirrors_scheduled: MIRROR_QUERY.merge( # EE only
    where: <<~SQL
      #{WHERE_MIRROR_ENABLED}
      AND project_mirror_data.status = 'scheduled'
    SQL
  ),
  mirrors_started: MIRROR_QUERY.merge( # EE only
    where: <<~SQL
      #{WHERE_MIRROR_ENABLED}
      AND project_mirror_data.status = 'started'
    SQL
  ),
  soft_deleted_projects: { select: :projects, where: "pending_delete=true" },
  orphaned_projects: {
    select: :projects,
    joins: "LEFT JOIN namespaces ON projects.namespace_id = namespaces.id",
    where: "namespaces.id IS NULL"
  },
  uploads: { select: :uploads },
  users: {
    select: :users,
    joins: "LEFT JOIN
      (
        SELECT
          members.user_id,
          MAX(access_level) as access_level
        FROM members
        GROUP BY members.user_id
      ) AS u
      ON users.id = u.user_id",
    where: "user_type IS NULL",
    fields: {
      admin: {},
      external: {},
      state: {},
      access_level: { definition: "COALESCE(u.access_level, 0)" }
    }
  },
  projects: {
    select: :projects,
    fields: {
      visibility_level: {},
      archived: {}
    }
  },
  groups: {
    select: :namespaces,
    fields: {
      visibility_level: {},
      root: { definition: "(parent_id IS NULL)" }
    }
  },
  registry_gc_manifest_review_queue: { select: :gc_manifest_review_queue },
  registry_gc_manifest_review_queue_overdue: {
    select: :gc_manifest_review_queue,
    where: "review_after < NOW()"
  },
  registry_gc_blob_review_queue: { select: :gc_blob_review_queue },
  registry_gc_blob_review_queue_overdue: {
    select: :gc_blob_review_queue,
    where: "review_after < NOW()"
  },
  registry_top_level_namespaces: { select: :top_level_namespaces },
  # Please note that the tables below are partitioned, so a SELECT COUNT(*)
  # will scale poorly. Avoid using these in production:
  registry_repositories: { select: :repositories },
  registry_manifests: { select: :manifests },
  registry_blobs: { select: :blobs }
}.freeze

Constants inherited from Base

Base::POOL_SIZE, Base::POOL_TIMEOUT

Instance Method Summary collapse

Methods inherited from Base

configure_type_map_for_results, connection_pool, #connection_pool, #with_connection_pool

Constructor Details

#initialize(selected_queries: nil, **args) ⇒ RowCountCollector

Returns a new instance of RowCountCollector.



153
154
155
156
157
# File 'lib/gitlab_exporter/database/row_count.rb', line 153

def initialize(selected_queries: nil, **args)
  super(**args)

  @selected_queries = Set.new(selected_queries.map(&:to_sym)) unless selected_queries.nil?
end

Instance Method Details

#runObject



159
160
161
162
163
164
165
166
167
168
169
170
# File 'lib/gitlab_exporter/database/row_count.rb', line 159

def run
  results = Hash.new(0)

  QUERIES.each do |key, query_hash|
    next if query_hash[:check] && !successful_check?(query_hash[:check])
    next if !@selected_queries.nil? && !@selected_queries.include?(key)

    results[key] = count_from_query_hash(query_hash)
  end

  results
end