Class: Gitlab::BackgroundMigration::DeduplicateLfsObjectsProjects

Inherits:
BatchedMigrationJob show all
Defined in:
lib/gitlab/background_migration/deduplicate_lfs_objects_projects.rb

Defined Under Namespace

Classes: LfsObjectsProject

Constant Summary

Constants inherited from BatchedMigrationJob

BatchedMigrationJob::DEFAULT_FEATURE_CATEGORY, BatchedMigrationJob::MINIMUM_PAUSE_MS

Constants included from Database::DynamicModelHelpers

Database::DynamicModelHelpers::BATCH_SIZE

Instance Method Summary collapse

Methods inherited from BatchedMigrationJob

#batch_metrics, cursor, cursor?, cursor_columns, feature_category, #filter_batch, generic_instance, #initialize, job_arguments, job_arguments_count, operation_name, scope_to

Methods included from Database::DynamicModelHelpers

define_batchable_model, #each_batch, #each_batch_range

Constructor Details

This class inherits a constructor from Gitlab::BackgroundMigration::BatchedMigrationJob

Instance Method Details

#performObject



16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# File 'lib/gitlab/background_migration/deduplicate_lfs_objects_projects.rb', line 16

def perform
  each_sub_batch do |relation|
    data = duplicates_by_project_id_and_lfs_object_id(relation)

    next if data.empty?

    # After plucking the duplicates, build a VALUE list
    id_list = Arel::Nodes::ValuesList.new(data).to_sql

    # Use the same GROUP BY query as in the MR to properly narrow down the duplicated records.
    # In the previous query we didn't include the repository_type because it is not covered with an index.
    subquery = LfsObjectsProject
                 .where("(project_id, lfs_object_id) IN (#{id_list})") # rubocop:disable GitlabSecurity/SqlInjection -- there is no user input given
                 .select('project_id, lfs_object_id, repository_type, MAX(id) AS max_id')
                 .group('project_id, lfs_object_id, repository_type')
                 .having('COUNT(*) > 1')

    join_query = "      INNER JOIN (\#{subquery.to_sql}) AS duplicates\n      ON lfs_objects_projects.project_id = duplicates.project_id\n      AND lfs_objects_projects.lfs_object_id = duplicates.lfs_object_id\n      AND lfs_objects_projects.repository_type IS NOT DISTINCT FROM duplicates.repository_type\n    SQL\n\n    duplicated_lfs_objects_projects = LfsObjectsProject.joins(join_query).where.not(\n      'lfs_objects_projects.id = duplicates.max_id'\n    )\n\n    LfsObjectsProject.where(id: duplicated_lfs_objects_projects.select(:id)).delete_all\n  end\nend\n".squish