Class: Gitlab::BackgroundMigration::BackfillEventsShardingKey

Inherits:
BatchedMigrationJob show all
Defined in:
lib/gitlab/background_migration/backfill_events_sharding_key.rb

Constant Summary collapse

SUB_BATCH_SIZE =
150

Constants inherited from BatchedMigrationJob

Gitlab::BackgroundMigration::BatchedMigrationJob::DEFAULT_FEATURE_CATEGORY

Constants included from Database::DynamicModelHelpers

Database::DynamicModelHelpers::BATCH_SIZE

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from BatchedMigrationJob

#batch_metrics, cursor, cursor?, cursor_columns, feature_category, #filter_batch, generic_instance, #initialize, job_arguments, job_arguments_count, operation_name, scope_to

Methods included from Database::DynamicModelHelpers

#define_batchable_model, #each_batch, #each_batch_range

Constructor Details

This class inherits a constructor from Gitlab::BackgroundMigration::BatchedMigrationJob

Class Method Details

.reset_orderObject



13
14
15
# File 'lib/gitlab/background_migration/backfill_events_sharding_key.rb', line 13

def self.reset_order
  false
end

Instance Method Details

#performObject



17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# File 'lib/gitlab/background_migration/backfill_events_sharding_key.rb', line 17

def perform
  each_sub_batch(batching_arguments: { reset_order: self.class.reset_order }) do |sub_batch|
    relation = sub_batch.select(:id, :group_id, :personal_namespace_id).limit(SUB_BATCH_SIZE)

    # Try to back-fill project_id / group_id from model
    backfill_from_model(relation)

    # Try to back-fill personal_namespace_id from author
    connection.execute(
      <<~SQL
        WITH relation AS MATERIALIZED (#{relation.to_sql}),
        filtered_relation AS MATERIALIZED (SELECT id FROM relation WHERE group_id IS NULL AND personal_namespace_id IS NULL LIMIT #{SUB_BATCH_SIZE})
        UPDATE events
        SET personal_namespace_id = namespaces.id
        FROM namespaces
        WHERE events.author_id = namespaces.owner_id AND namespaces.type = 'User'
        AND events.id IN (SELECT id FROM filtered_relation)
      SQL
    )

    # Delete records without sharding key
    connection.execute(
      <<~SQL
        WITH relation AS MATERIALIZED (#{relation.to_sql}),
        filtered_relation AS MATERIALIZED (SELECT id FROM relation WHERE group_id IS NULL AND personal_namespace_id IS NULL LIMIT #{SUB_BATCH_SIZE})
        DELETE FROM events USING filtered_relation WHERE events.id = filtered_relation.id
      SQL
    )
  end
end