Module: Gitlab::Database::PartitioningMigrationHelpers::TableManagementHelpers
- Included in:
- Gitlab::Database::PartitioningMigrationHelpers
- Defined in:
- lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb
Constant Summary collapse
- ALLOWED_TABLES =
%w[audit_events web_hook_logs].freeze
- ERROR_SCOPE =
'table partitioning'
- MIGRATION_CLASS_NAME =
"::#{module_parent_name}::BackfillPartitionedTable"
- MIGRATION =
"BackfillPartitionedTable"
- BATCH_INTERVAL =
2.minutes.freeze
- BATCH_SIZE =
50_000
- SUB_BATCH_SIZE =
2_500
Constants included from MigrationHelpers::LooseForeignKeyHelpers
MigrationHelpers::LooseForeignKeyHelpers::DELETED_RECORDS_INSERT_FUNCTION_NAME
Constants included from MigrationHelpers
MigrationHelpers::DEFAULT_TIMESTAMP_COLUMNS
Constants included from Migrations::RedisHelpers
Migrations::RedisHelpers::SCAN_START_CURSOR
Constants included from Migrations::SidekiqHelpers
Migrations::SidekiqHelpers::DEFAULT_MAX_ATTEMPTS, Migrations::SidekiqHelpers::DEFAULT_TIMES_IN_A_ROW
Constants included from Migrations::ConstraintsHelpers
Migrations::ConstraintsHelpers::MAX_IDENTIFIER_NAME_LENGTH
Constants included from Migrations::BatchedBackgroundMigrationHelpers
Migrations::BatchedBackgroundMigrationHelpers::BATCH_CLASS_NAME, Migrations::BatchedBackgroundMigrationHelpers::BATCH_MIN_DELAY, Migrations::BatchedBackgroundMigrationHelpers::BATCH_MIN_VALUE, Migrations::BatchedBackgroundMigrationHelpers::NonExistentMigrationError
Constants included from Migrations::BackgroundMigrationHelpers
Migrations::BackgroundMigrationHelpers::JOB_BUFFER_SIZE
Instance Method Summary collapse
-
#cleanup_partitioning_data_migration(table_name) ⇒ Object
Cleanup a previously enqueued background migration to copy data into a partitioned table.
- #convert_table_to_first_list_partition(table_name:, partitioning_column:, parent_table_name:, initial_partitioning_value:, lock_tables: []) ⇒ Object
- #create_hash_partitions(table_name, number_of_partitions) ⇒ Object
- #create_trigger_to_sync_tables(source_table_name, partitioned_table_name, unique_key) ⇒ Object
- #drop_nonpartitioned_archive_table(table_name) ⇒ Object
-
#drop_partitioned_table_for(table_name) ⇒ Object
Clean up a partitioned copy of an existing table.
-
#enqueue_partitioning_data_migration(table_name) ⇒ Object
Enqueue the background jobs that will backfill data in the partitioned table, by batch-copying records from original table.
-
#finalize_backfilling_partitioned_table(table_name) ⇒ Object
Executes jobs from previous BatchedBackgroundMigration to backfill the partitioned table by finishing pending jobs.
-
#partition_table_by_date(table_name, column_name, min_date: nil, max_date: nil) ⇒ Object
Creates a partitioned copy of an existing table, using a RANGE partitioning strategy on a timestamp column.
- #prepare_constraint_for_list_partitioning(table_name:, partitioning_column:, parent_table_name:, initial_partitioning_value:, async: false) ⇒ Object
-
#replace_with_partitioned_table(table_name) ⇒ Object
Replaces a non-partitioned table with its partitioned copy.
- #revert_converting_table_to_first_list_partition(table_name:, partitioning_column:, parent_table_name:, initial_partitioning_value:) ⇒ Object
- #revert_preparing_constraint_for_list_partitioning(table_name:, partitioning_column:, parent_table_name:, initial_partitioning_value:) ⇒ Object
-
#rollback_replace_with_partitioned_table(table_name) ⇒ Object
Rolls back a migration that replaced a non-partitioned table with its partitioned copy.
Methods included from MigrationHelpers::LooseForeignKeyHelpers
#has_loose_foreign_key?, #track_record_deletions, #untrack_record_deletions
Methods included from SchemaHelpers
#assert_not_in_transaction_block, #create_comment, #create_trigger, #create_trigger_function, #drop_function, #drop_trigger, #function_exists?, #object_name, #tmp_table_name, #trigger_exists?
Methods included from MigrationHelpers
#add_concurrent_foreign_key, #add_concurrent_index, #add_primary_key_using_index, #add_sequence, #add_timestamps_with_timezone, #backfill_conversion_of_integer_to_bigint, #backfill_iids, #change_column_type_concurrently, #check_trigger_permissions!, #cleanup_concurrent_column_rename, #cleanup_concurrent_column_type_change, #column_for, #concurrent_foreign_key_name, #convert_to_bigint_column, #convert_to_type_column, #copy_foreign_keys, #copy_indexes, #create_or_update_plan_limit, #create_temporary_columns_and_triggers, #define_batchable_model, #drop_sequence, #each_batch, #each_batch_range, #false_value, #foreign_key_exists?, #foreign_keys_for, #index_exists_by_name?, #index_invalid?, #indexes_for, #initialize_conversion_of_integer_to_bigint, #install_rename_triggers, #partition?, #postgres_exists_by_name?, #remove_column_default, #remove_concurrent_index, #remove_concurrent_index_by_name, #remove_foreign_key_if_exists, #remove_foreign_key_without_error, #remove_rename_triggers, #remove_timestamps, #rename_column_concurrently, #rename_trigger_name, #replace_sql, #restore_conversion_of_integer_to_bigint, #revert_backfill_conversion_of_integer_to_bigint, #revert_initialize_conversion_of_integer_to_bigint, #swap_primary_key, #table_partitioned?, #true_value, #undo_change_column_type_concurrently, #undo_cleanup_concurrent_column_rename, #undo_cleanup_concurrent_column_type_change, #undo_rename_column_concurrently, #update_column_in_batches, #validate_foreign_key
Methods included from MigrationHelpers::WraparoundVacuumHelpers
#check_if_wraparound_in_progress
Methods included from AsyncConstraints::MigrationHelpers
#prepare_async_check_constraint_validation, #prepare_async_foreign_key_validation, #prepare_partitioned_async_foreign_key_validation, #unprepare_async_check_constraint_validation, #unprepare_async_foreign_key_validation, #unprepare_partitioned_async_foreign_key_validation
Methods included from AsyncIndexes::MigrationHelpers
#async_index_creation_available?, #prepare_async_index, #prepare_async_index_from_sql, #prepare_async_index_removal, #unprepare_async_index, #unprepare_async_index_by_name
Methods included from RenameTableHelpers
#finalize_table_rename, #rename_table_safely, #undo_finalize_table_rename, #undo_rename_table_safely
Methods included from DynamicModelHelpers
#define_batchable_model, #each_batch, #each_batch_range
Methods included from Migrations::RedisHelpers
Methods included from Migrations::SidekiqHelpers
#sidekiq_queue_length, #sidekiq_queue_migrate, #sidekiq_remove_jobs
Methods included from Migrations::ExtensionHelpers
#create_extension, #drop_extension
Methods included from Migrations::ConstraintsHelpers
#add_check_constraint, #add_not_null_constraint, #add_text_limit, #check_constraint_exists?, check_constraint_exists?, #check_constraint_name, #check_not_null_constraint_exists?, #check_text_limit_exists?, #copy_check_constraints, #drop_constraint, #remove_check_constraint, #remove_not_null_constraint, #remove_text_limit, #rename_constraint, #switch_constraint_names, #text_limit_name, #validate_check_constraint, #validate_check_constraint_name!, #validate_not_null_constraint, #validate_text_limit
Methods included from Migrations::TimeoutHelpers
Methods included from Migrations::LockRetriesHelpers
Methods included from Migrations::BatchedBackgroundMigrationHelpers
#delete_batched_background_migration, #ensure_batched_background_migration_is_finished, #finalize_batched_background_migration, #gitlab_schema_from_context, #queue_batched_background_migration
Methods included from Migrations::BackgroundMigrationHelpers
#delete_job_tracking, #delete_queued_jobs, #finalize_background_migration, #migrate_in, #queue_background_migration_jobs_by_range_at_intervals, #requeue_background_migration_jobs_by_range_at_intervals
Methods included from Migrations::ReestablishedConnectionStack
#with_restored_connection_stack
Instance Method Details
#cleanup_partitioning_data_migration(table_name) ⇒ Object
Cleanup a previously enqueued background migration to copy data into a partitioned table. This will not prevent the enqueued jobs from executing, but instead cleans up information in the database used to track the state of the batched background migration. It should be safe to also remove the partitioned table even if the background jobs are still in-progress, as the absence of the table will cause them to safely exit.
Example:
cleanup_partitioning_data_migration :audit_events
128 129 130 131 132 133 134 135 |
# File 'lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb', line 128 def cleanup_partitioning_data_migration(table_name) assert_table_is_allowed(table_name) partitioned_table_name = make_partitioned_table_name(table_name) primary_key = connection.primary_key(table_name) delete_batched_background_migration(MIGRATION, table_name, primary_key, [partitioned_table_name]) end |
#convert_table_to_first_list_partition(table_name:, partitioning_column:, parent_table_name:, initial_partitioning_value:, lock_tables: []) ⇒ Object
282 283 284 285 286 287 288 289 290 291 292 |
# File 'lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb', line 282 def convert_table_to_first_list_partition(table_name:, partitioning_column:, parent_table_name:, initial_partitioning_value:, lock_tables: []) validate_not_in_transaction!(:convert_table_to_first_list_partition) Gitlab::Database::Partitioning::List::ConvertTable .new(migration_context: self, table_name: table_name, parent_table_name: parent_table_name, partitioning_column: partitioning_column, zero_partition_value: initial_partitioning_value ).partition end |
#create_hash_partitions(table_name, number_of_partitions) ⇒ Object
137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 |
# File 'lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb', line 137 def create_hash_partitions(table_name, number_of_partitions) transaction do (0..number_of_partitions - 1).each do |partition| decimals = Math.log10(number_of_partitions).ceil suffix = "%0#{decimals}d" % partition partition_name = "#{table_name}_#{suffix}" schema = Gitlab::Database::STATIC_PARTITIONS_SCHEMA execute(<<~SQL) CREATE TABLE #{schema}.#{partition_name} PARTITION OF #{table_name} FOR VALUES WITH (MODULUS #{number_of_partitions}, REMAINDER #{partition}); SQL end end end |
#create_trigger_to_sync_tables(source_table_name, partitioned_table_name, unique_key) ⇒ Object
248 249 250 251 252 253 254 255 256 |
# File 'lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb', line 248 def create_trigger_to_sync_tables(source_table_name, partitioned_table_name, unique_key) function_name = make_sync_function_name(source_table_name) trigger_name = make_sync_trigger_name(source_table_name) create_sync_function(function_name, partitioned_table_name, unique_key) create_comment('FUNCTION', function_name, "Partitioning migration: table sync for #{source_table_name} table") create_sync_trigger(source_table_name, trigger_name, function_name) end |
#drop_nonpartitioned_archive_table(table_name) ⇒ Object
236 237 238 239 240 241 242 243 244 245 246 |
# File 'lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb', line 236 def drop_nonpartitioned_archive_table(table_name) assert_table_is_allowed(table_name) archived_table_name = make_archived_table_name(table_name) with_lock_retries do drop_sync_trigger(table_name) end drop_table(archived_table_name) end |
#drop_partitioned_table_for(table_name) ⇒ Object
Clean up a partitioned copy of an existing table. First, deletes the database function and trigger that were used to copy writes to the partitioned table, then removes the partitioned table (also removing partitions).
Example:
drop_partitioned_table_for :audit_events
81 82 83 84 85 86 87 88 89 90 91 |
# File 'lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb', line 81 def drop_partitioned_table_for(table_name) assert_table_is_allowed(table_name) assert_not_in_transaction_block(scope: ERROR_SCOPE) with_lock_retries do drop_sync_trigger(table_name) end partitioned_table_name = make_partitioned_table_name(table_name) drop_table(partitioned_table_name) end |
#enqueue_partitioning_data_migration(table_name) ⇒ Object
Enqueue the background jobs that will backfill data in the partitioned table, by batch-copying records from original table. This helper should be called from a post-deploy migration.
Example:
enqueue_partitioning_data_migration :audit_events
100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 |
# File 'lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb', line 100 def enqueue_partitioning_data_migration(table_name) assert_table_is_allowed(table_name) assert_not_in_transaction_block(scope: ERROR_SCOPE) partitioned_table_name = make_partitioned_table_name(table_name) primary_key = connection.primary_key(table_name) queue_batched_background_migration( MIGRATION, table_name, primary_key, partitioned_table_name, batch_size: BATCH_SIZE, sub_batch_size: SUB_BATCH_SIZE, job_interval: BATCH_INTERVAL ) end |
#finalize_backfilling_partitioned_table(table_name) ⇒ Object
Executes jobs from previous BatchedBackgroundMigration to backfill the partitioned table by finishing pending jobs.
NOTE Migrations using this method cannot be scheduled in the same release as the migration that schedules the background migration using the ‘enqueue_partitioning_data_migration` helper, or else the background migration jobs will be force-executed.
Example:
finalize_backfilling_partitioned_table :audit_events
165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 |
# File 'lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb', line 165 def finalize_backfilling_partitioned_table(table_name) assert_table_is_allowed(table_name) partitioned_table_name = make_partitioned_table_name(table_name) unless table_exists?(partitioned_table_name) raise "could not find partitioned table for #{table_name}, " \ "this could indicate the previous partitioning migration has been rolled back." end ensure_batched_background_migration_is_finished( job_class_name: MIGRATION, table_name: table_name, column_name: connection.primary_key(table_name), job_arguments: [partitioned_table_name] ) Gitlab::Database::QueryAnalyzers::RestrictAllowedSchemas.with_suppressed do disable_statement_timeout do execute("VACUUM FREEZE ANALYZE #{partitioned_table_name}") end end end |
#partition_table_by_date(table_name, column_name, min_date: nil, max_date: nil) ⇒ Object
Creates a partitioned copy of an existing table, using a RANGE partitioning strategy on a timestamp column. One partition is created per month between the given ‘min_date` and `max_date`. Also installs a trigger on the original table to copy writes into the partitioned table. To copy over historic data from before creation of the partitioned table, use the `enqueue_partitioning_data_migration` helper in a post-deploy migration.
A copy of the original table is required as PG currently does not support partitioning existing tables.
Example:
partition_table_by_date :audit_events, :created_at, min_date: Date.new(2020, 1), max_date: Date.new(2020, 6)
Options are:
:min_date - a date specifying the lower bounds of the partition range
:max_date - a date specifying the upper bounds of the partitioning range, defaults to today + 1 month
Unless min_date is specified explicitly, we default to
-
The minimum value for the partitioning column in the table
-
If no data is present yet, the current month
38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
# File 'lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb', line 38 def partition_table_by_date(table_name, column_name, min_date: nil, max_date: nil) Gitlab::Database::QueryAnalyzers::RestrictAllowedSchemas.require_ddl_mode! assert_table_is_allowed(table_name) assert_not_in_transaction_block(scope: ERROR_SCOPE) max_date ||= Date.today + 1.month Gitlab::Database::QueryAnalyzers::RestrictAllowedSchemas.with_suppressed do min_date ||= connection.select_one(<<~SQL)['minimum'] || max_date - 1.month SELECT date_trunc('MONTH', MIN(#{column_name})) AS minimum FROM #{table_name} SQL end raise "max_date #{max_date} must be greater than min_date #{min_date}" if min_date >= max_date primary_key = connection.primary_key(table_name) raise "primary key not defined for #{table_name}" if primary_key.nil? partition_column = find_column_definition(table_name, column_name) raise "partition column #{column_name} does not exist on #{table_name}" if partition_column.nil? partitioned_table_name = make_partitioned_table_name(table_name) transaction do create_range_partitioned_copy(table_name, partitioned_table_name, partition_column, primary_key) create_daterange_partitions(partitioned_table_name, partition_column.name, min_date, max_date) end with_lock_retries do create_trigger_to_sync_tables(table_name, partitioned_table_name, primary_key) end end |
#prepare_constraint_for_list_partitioning(table_name:, partitioning_column:, parent_table_name:, initial_partitioning_value:, async: false) ⇒ Object
258 259 260 261 262 263 264 265 266 267 268 |
# File 'lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb', line 258 def prepare_constraint_for_list_partitioning(table_name:, partitioning_column:, parent_table_name:, initial_partitioning_value:, async: false) validate_not_in_transaction!(:prepare_constraint_for_list_partitioning) Gitlab::Database::Partitioning::List::ConvertTable .new(migration_context: self, table_name: table_name, parent_table_name: parent_table_name, partitioning_column: partitioning_column, zero_partition_value: initial_partitioning_value ).prepare_for_partitioning(async: async) end |
#replace_with_partitioned_table(table_name) ⇒ Object
Replaces a non-partitioned table with its partitioned copy. This is the final step in a partitioning migration, which makes the partitioned table ready for use by the application. The partitioned copy should be replaced with the original table in such a way that it appears seamless to any database clients. The replaced table will be renamed to “#replaced_table_archived”. Partitions and primary key constraints will also be renamed to match the naming scheme of the parent table.
NOTE This method should only be used after all other migration steps have completed successfully. There are several limitations to this method that MUST be handled before, or during, the swap migration:
-
Secondary indexes and foreign keys are not automatically recreated on the partitioned table.
-
Some types of constraints (UNIQUE and EXCLUDE) which rely on indexes, will not automatically be recreated on the partitioned table, since the underlying index will not be present.
-
Foreign keys referencing the original non-partitioned table, would also need to be updated to reference the partitioned table, but unfortunately this is not supported in PG11.
-
Views referencing the original table will not be automatically updated to reference the partitioned table.
Example:
replace_with_partitioned_table :audit_events
209 210 211 212 213 214 215 216 217 |
# File 'lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb', line 209 def replace_with_partitioned_table(table_name) assert_table_is_allowed(table_name) partitioned_table_name = make_partitioned_table_name(table_name) archived_table_name = make_archived_table_name(table_name) primary_key_name = connection.primary_key(table_name) replace_table(table_name, partitioned_table_name, archived_table_name, primary_key_name) end |
#revert_converting_table_to_first_list_partition(table_name:, partitioning_column:, parent_table_name:, initial_partitioning_value:) ⇒ Object
294 295 296 297 298 299 300 301 302 303 304 |
# File 'lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb', line 294 def revert_converting_table_to_first_list_partition(table_name:, partitioning_column:, parent_table_name:, initial_partitioning_value:) validate_not_in_transaction!(:revert_converting_table_to_first_list_partition) Gitlab::Database::Partitioning::List::ConvertTable .new(migration_context: self, table_name: table_name, parent_table_name: parent_table_name, partitioning_column: partitioning_column, zero_partition_value: initial_partitioning_value ).revert_partitioning end |
#revert_preparing_constraint_for_list_partitioning(table_name:, partitioning_column:, parent_table_name:, initial_partitioning_value:) ⇒ Object
270 271 272 273 274 275 276 277 278 279 280 |
# File 'lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb', line 270 def revert_preparing_constraint_for_list_partitioning(table_name:, partitioning_column:, parent_table_name:, initial_partitioning_value:) validate_not_in_transaction!(:revert_preparing_constraint_for_list_partitioning) Gitlab::Database::Partitioning::List::ConvertTable .new(migration_context: self, table_name: table_name, parent_table_name: parent_table_name, partitioning_column: partitioning_column, zero_partition_value: initial_partitioning_value ).revert_preparation_for_partitioning end |
#rollback_replace_with_partitioned_table(table_name) ⇒ Object
Rolls back a migration that replaced a non-partitioned table with its partitioned copy. This can be used to restore the original non-partitioned table in the event of an unexpected issue.
Example:
rollback_replace_with_partitioned_table :audit_events
226 227 228 229 230 231 232 233 234 |
# File 'lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb', line 226 def rollback_replace_with_partitioned_table(table_name) assert_table_is_allowed(table_name) partitioned_table_name = make_partitioned_table_name(table_name) archived_table_name = make_archived_table_name(table_name) primary_key_name = connection.primary_key(archived_table_name) replace_table(table_name, archived_table_name, partitioned_table_name, primary_key_name) end |