Class: GitLab::Exporter::Database::RowCountCollector

Inherits:
Base
  • Object
show all
Defined in:
lib/gitlab_exporter/database/row_count.rb

Overview

A helper class that executes the query its given and returns an int of the row count This class works under the assumption you do COUNT(*) queries, define queries in the QUERIES constant. If in doubt how these work, read #construct_query

Constant Summary collapse

WHERE_MIRROR_ENABLED =

We ignore mirrors with a next_execution_timestamp before 2020-03-28 because this is when we stopped processing mirrors for private projects on the free plan. Skipping those can significantly improve query performance: gitlab.com/gitlab-org/gitlab/-/issues/216252#note_334514544

"projects.mirror = true\nAND projects.archived = false\nAND project_mirror_data.retry_count <= 14\nAND project_mirror_data.next_execution_timestamp > '2020-03-28'\n".freeze
MIRROR_QUERY =
{
  select: :projects,
  joins: "    INNER JOIN project_mirror_data ON project_mirror_data.project_id = projects.id\n  SQL\n  check: \"SELECT 1 FROM information_schema.tables WHERE table_name='plans'\"\n}.freeze\n",
CONTAINER_REPOSITORIES_CLEANUP_ENABLED_QUERY =
{
  select: :container_repositories,
  joins: "    INNER JOIN container_expiration_policies\n    ON container_repositories.project_id = container_expiration_policies.project_id\n  SQL\n  where: \"container_expiration_policies.enabled = TRUE\"\n}.freeze\n",
QUERIES =
{
  mirrors_ready_to_sync: MIRROR_QUERY.merge( # EE only
    where: "      \#{WHERE_MIRROR_ENABLED}\n      AND project_mirror_data.status NOT IN ('scheduled', 'started')\n      AND project_mirror_data.next_execution_timestamp <= NOW()\n    SQL\n  ),\n  mirrors_not_updated_recently: MIRROR_QUERY.merge( # EE only\n    where: <<~SQL\n      \#{WHERE_MIRROR_ENABLED}\n      AND project_mirror_data.status NOT IN ('scheduled', 'started')\n      AND (project_mirror_data.next_execution_timestamp - project_mirror_data.last_update_at) <= '30 minutes'::interval\n      AND project_mirror_data.last_update_at < NOW() - '30 minutes'::interval\n    SQL\n  ),\n  mirrors_updated_very_recently: MIRROR_QUERY.merge( # EE only\n    where: <<~SQL\n      \#{WHERE_MIRROR_ENABLED}\n      AND project_mirror_data.status NOT IN ('scheduled', 'started')\n      AND project_mirror_data.last_update_at >= NOW() - '30 seconds'::interval\n    SQL\n  ),\n  mirrors_behind_schedule: MIRROR_QUERY.merge( # EE only\n    where: <<~SQL\n      \#{WHERE_MIRROR_ENABLED}\n      AND project_mirror_data.status NOT IN ('scheduled', 'started')\n      AND project_mirror_data.next_execution_timestamp <= NOW() - '10 seconds'::interval\n    SQL\n  ),\n  mirrors_scheduled_or_started: MIRROR_QUERY.merge( # EE only\n    where: <<~SQL\n      \#{WHERE_MIRROR_ENABLED}\n      AND project_mirror_data.status IN ('scheduled', 'started')\n    SQL\n  ),\n  mirrors_scheduled: MIRROR_QUERY.merge( # EE only\n    where: <<~SQL\n      \#{WHERE_MIRROR_ENABLED}\n      AND project_mirror_data.status = 'scheduled'\n    SQL\n  ),\n  mirrors_started: MIRROR_QUERY.merge( # EE only\n    where: <<~SQL\n      \#{WHERE_MIRROR_ENABLED}\n      AND project_mirror_data.status = 'started'\n    SQL\n  ),\n  soft_deleted_projects: { select: :projects, where: \"pending_delete=true\" },\n  orphaned_projects: {\n    select: :projects,\n    joins: \"LEFT JOIN namespaces ON projects.namespace_id = namespaces.id\",\n    where: \"namespaces.id IS NULL\"\n  },\n  uploads: { select: :uploads },\n  users: {\n    select: :users,\n    joins: \"LEFT JOIN\n      (\n        SELECT\n          members.user_id,\n          MAX(access_level) as access_level\n        FROM members\n        GROUP BY members.user_id\n      ) AS u\n      ON users.id = u.user_id\",\n    where: \"user_type = 0\",\n    fields: {\n      admin: {},\n      external: {},\n      state: {},\n      access_level: { definition: \"COALESCE(u.access_level, 0)\" }\n    }\n  },\n  projects: {\n    select: :projects,\n    fields: {\n      visibility_level: {},\n      archived: {}\n    }\n  },\n  namespaces: {\n    select: :namespaces,\n    fields: {\n      type: {},\n      visibility_level: {},\n      root: { definition: \"(parent_id IS NULL)\" }\n    }\n  },\n  container_repositories: { select: :container_repositories },\n  container_repositories_delete_scheduled: { select: :container_repositories, where: \"status = 0\" },\n  container_repositories_delete_failed: { select: :container_repositories, where: \"status = 1\" },\n  container_repositories_delete_ongoing: { select: :container_repositories, where: \"status = 2\" },\n  container_repositories_delete_staled: {\n    select: :container_repositories,\n    where: \"status = 2 AND delete_started_at < (NOW() - INTERVAL '30 minutes')\"\n  },\n  container_repositories_cleanup_enabled: CONTAINER_REPOSITORIES_CLEANUP_ENABLED_QUERY,\n  container_repositories_cleanup_pending: CONTAINER_REPOSITORIES_CLEANUP_ENABLED_QUERY.merge(\n    where: <<~SQL\n      container_expiration_policies.enabled = TRUE\n      AND container_repositories.expiration_policy_cleanup_status IN (0, 1)\n      AND (container_repositories.expiration_policy_started_at IS NULL OR container_repositories.expiration_policy_started_at < container_expiration_policies.next_run_at)\n      AND (container_expiration_policies.next_run_at < NOW())\n    SQL\n  ),\n  container_repositories_cleanup_unfinished: CONTAINER_REPOSITORIES_CLEANUP_ENABLED_QUERY.merge(\n    where: <<~SQL\n      container_expiration_policies.enabled = TRUE\n      AND container_repositories.expiration_policy_cleanup_status = 2\n    SQL\n  ),\n  container_repositories_cleanup_unscheduled: CONTAINER_REPOSITORIES_CLEANUP_ENABLED_QUERY.merge(\n    where: <<~SQL\n      container_expiration_policies.enabled = TRUE\n      AND container_repositories.expiration_policy_cleanup_status = 0\n    SQL\n  ),\n  container_repositories_cleanup_scheduled: CONTAINER_REPOSITORIES_CLEANUP_ENABLED_QUERY.merge(\n    where: <<~SQL\n      container_expiration_policies.enabled = TRUE\n      AND container_repositories.expiration_policy_cleanup_status = 1\n    SQL\n  ),\n  container_repositories_cleanup_ongoing: {\n    select: :container_repositories,\n    where: \"expiration_policy_cleanup_status = 3\"\n  },\n  container_repositories_cleanup_staled: {\n    select: :container_repositories,\n    where: <<~SQL\n      expiration_policy_cleanup_status = 3\n      AND (expiration_policy_started_at < (NOW() - INTERVAL '35 minutes') OR expiration_policy_started_at IS NULL)\n    SQL\n  }\n}.freeze\n"

Constants inherited from Base

Base::POOL_SIZE, Base::POOL_TIMEOUT

Instance Method Summary collapse

Methods inherited from Base

configure_type_map_for_results, connection_pool, #connection_pool, #with_connection_pool

Constructor Details

#initialize(selected_queries: nil, **args) ⇒ RowCountCollector

Returns a new instance of RowCountCollector.



178
179
180
181
182
# File 'lib/gitlab_exporter/database/row_count.rb', line 178

def initialize(selected_queries: nil, **args)
  super(**args)

  @selected_queries = Set.new(selected_queries.map(&:to_sym)) unless selected_queries.nil?
end

Instance Method Details

#runObject



184
185
186
187
188
189
190
191
192
193
194
195
# File 'lib/gitlab_exporter/database/row_count.rb', line 184

def run
  results = Hash.new(0)

  QUERIES.each do |key, query_hash|
    next if query_hash[:check] && !successful_check?(query_hash[:check])
    next if !@selected_queries.nil? && !@selected_queries.include?(key)

    results[key] = count_from_query_hash(query_hash)
  end

  results
end