Class: Gitlab::BackgroundMigration::DeduplicateLfsObjectsProjects
- Inherits:
- 
      BatchedMigrationJob
      
        - Object
- BatchedMigrationJob
- Gitlab::BackgroundMigration::DeduplicateLfsObjectsProjects
 
- Defined in:
- lib/gitlab/background_migration/deduplicate_lfs_objects_projects.rb
Defined Under Namespace
Classes: LfsObjectsProject
Constant Summary
Constants inherited from BatchedMigrationJob
BatchedMigrationJob::DEFAULT_FEATURE_CATEGORY, BatchedMigrationJob::MINIMUM_PAUSE_MS
Constants included from Database::DynamicModelHelpers
Database::DynamicModelHelpers::BATCH_SIZE
Instance Method Summary collapse
Methods inherited from BatchedMigrationJob
#batch_metrics, cursor, cursor?, cursor_columns, feature_category, #filter_batch, generic_instance, #initialize, job_arguments, job_arguments_count, operation_name, scope_to
Methods included from Database::DynamicModelHelpers
define_batchable_model, #each_batch, #each_batch_range
Constructor Details
This class inherits a constructor from Gitlab::BackgroundMigration::BatchedMigrationJob
Instance Method Details
#perform ⇒ Object
| 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 | # File 'lib/gitlab/background_migration/deduplicate_lfs_objects_projects.rb', line 16 def perform each_sub_batch do |relation| data = duplicates_by_project_id_and_lfs_object_id(relation) next if data.empty? # After plucking the duplicates, build a VALUE list id_list = Arel::Nodes::ValuesList.new(data).to_sql # Use the same GROUP BY query as in the MR to properly narrow down the duplicated records. # In the previous query we didn't include the repository_type because it is not covered with an index. subquery = LfsObjectsProject .where("(project_id, lfs_object_id) IN (#{id_list})") # rubocop:disable GitlabSecurity/SqlInjection -- there is no user input given .select('project_id, lfs_object_id, repository_type, MAX(id) AS max_id') .group('project_id, lfs_object_id, repository_type') .having('COUNT(*) > 1') join_query = " INNER JOIN (\#{subquery.to_sql}) AS duplicates\n ON lfs_objects_projects.project_id = duplicates.project_id\n AND lfs_objects_projects.lfs_object_id = duplicates.lfs_object_id\n AND lfs_objects_projects.repository_type IS NOT DISTINCT FROM duplicates.repository_type\n SQL\n\n duplicated_lfs_objects_projects = LfsObjectsProject.joins(join_query).where.not(\n 'lfs_objects_projects.id = duplicates.max_id'\n )\n\n LfsObjectsProject.where(id: duplicated_lfs_objects_projects.select(:id)).delete_all\n end\nend\n".squish |