Class: Gitlab::Database::Migrations::TestBatchedBackgroundRunner
- Inherits:
-
BaseBackgroundRunner
- Object
- BaseBackgroundRunner
- Gitlab::Database::Migrations::TestBatchedBackgroundRunner
- Includes:
- DynamicModelHelpers
- Defined in:
- lib/gitlab/database/migrations/test_batched_background_runner.rb
Constant Summary collapse
- MIGRATION_DETAILS_FILE_NAME =
'details.json'
Constants included from DynamicModelHelpers
DynamicModelHelpers::BATCH_SIZE
Instance Attribute Summary
Attributes inherited from BaseBackgroundRunner
Instance Method Summary collapse
-
#initialize(result_dir:, connection:, from_id:) ⇒ TestBatchedBackgroundRunner
constructor
A new instance of TestBatchedBackgroundRunner.
-
#jobs_by_migration_name ⇒ Object
rubocop:disable Metrics/AbcSize -- This method is temporarily more complex while it deals with both cursor and non-cursor migrations.
- #print_job_progress(batch_name, job) ⇒ Object
-
#run_job(job) ⇒ Object
rubocop:enable Metrics/AbcSize.
- #uniform_fractions ⇒ Object
Methods included from DynamicModelHelpers
define_batchable_model, #each_batch, #each_batch_range
Methods inherited from BaseBackgroundRunner
Constructor Details
#initialize(result_dir:, connection:, from_id:) ⇒ TestBatchedBackgroundRunner
Returns a new instance of TestBatchedBackgroundRunner.
11 12 13 14 15 |
# File 'lib/gitlab/database/migrations/test_batched_background_runner.rb', line 11 def initialize(result_dir:, connection:, from_id:) super(result_dir: result_dir, connection: connection) @connection = connection @from_id = from_id end |
Instance Method Details
#jobs_by_migration_name ⇒ Object
rubocop:disable Metrics/AbcSize -- This method is temporarily more complex while it deals with both cursor and non-cursor migrations. The complexity will significantly decrease when non-cursor migration support is removed.
20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
# File 'lib/gitlab/database/migrations/test_batched_background_runner.rb', line 20 def jobs_by_migration_name set_shared_model_connection do Gitlab::Database::BackgroundMigration::BatchedMigration .executable .where('id > ?', from_id) .to_h do |migration| batching_strategy = migration.batch_class.new(connection: connection) is_cursor = migration.cursor? # Pretend every migration is a cursor migration. When actually running the job, # we can unwrap the cursor if it is not. cursor_columns = is_cursor ? migration.job_class.cursor_columns : [migration.column_name] # Wrap the single result into an array (that we pretend is a cursor) if this # is not a cursor migration. (next_min_value has an if check on cursor? and returns either array or int) table_min_cursor = Array.wrap(migration.next_min_value) ordering = cursor_columns.map { |c| { c => :desc } } rows_ordered_backwards = define_batchable_model(migration.table_name, connection: connection) .order(*ordering) # If only one column, pluck.first returns a single value for that column instead of an array of # all (1) column(s) # So wrap the result for consistency between 1 and many columns table_max_cursor = Array.wrap(rows_ordered_backwards.pick(*cursor_columns)) # variance is the portion of the batch range that we shrink between variance * 0 and variance * 1 # to pick actual batches to sample. # Here we're going to do something that is explicitly WRONG, but good enough - we assume that we can # just scale the first element of the cursor to get a reasonable percentage of the way through the table. # This is really not true at all, but it's close enough for testing. # For the rest of the components of our example cursors, we'll reuse parts of the end cursors for each # batch for the start cursors of the next batch variance = table_max_cursor[0] - table_min_cursor[0] batch_first_elems = uniform_fractions.lazy.map { |frac| (variance * frac).to_i } jobs_to_sample = Enumerator.new do |y| completed_batches = [] # We construct the starting cursor from the end of the prev loop, # or just the beginning of the table on the first loop # This way, cursors for our batches start at interesting places in all of their positions prev_end_cursor = table_min_cursor loop do first_elem = batch_first_elems.next batch_start = [first_elem] + prev_end_cursor[1..] break if completed_batches.any? { |batch| batch.cover?(batch_start) } # The current block is lazily evaluated as part of the jobs_to_sample enumerable # so it executes after the enclosing using_connection block has already executed # Therefore we need to re-associate with the explicit connection again Gitlab::Database::SharedModel.using_connection(connection) do next_bounds = batching_strategy.next_batch( migration.table_name, migration.column_name, batch_min_value: is_cursor ? batch_start : batch_start[0], batch_size: migration.batch_size, job_class: migration.job_class, job_arguments: migration.job_arguments ) # If no rows match, the next_bounds are nil. # This will only happen if there are zero rows to match from the current sampling point to the end # of the table # Simulate the approach in the actual background migration worker by not sampling a batch # from this range. # (The actual worker would finish the migration, but we may find batches that can be sampled # elsewhere in the table) if next_bounds.nil? # If the migration has no work to do across the entire table, sampling can get stuck # in a loop if we don't mark the attempted batches as completed # We need to guess a size for this. The batch size of the migration is way too big in all # cases with a 2-element or more cursor, but it doesn't really matter so we just guess that. synthetic_cursor_offset = migration.batch_size batch_end = batch_start.dup batch_end[0] += synthetic_cursor_offset completed_batches << (batch_start..batch_end) next end batch_min, batch_max = next_bounds # These are ints if not a cursor, wrap them to maintain the illusion that everything is a cursor job = migration.create_batched_job!(batch_min, batch_max) # Wrap the batch min/max back as cursors if the migration was not cursor-based batch_min = Array.wrap(batch_min) batch_max = Array.wrap(batch_max) # Save the max as cursor details for the next loop so that we test # interesting cursor positions. prev_end_cursor = batch_max completed_batches << (batch_min..batch_max) y << job end end end job_class_name = migration.job_class_name export_migration_details(job_class_name, migration.slice(:interval, :total_tuple_count, :max_batch_size)) [job_class_name, jobs_to_sample] end end end |
#print_job_progress(batch_name, job) ⇒ Object
141 142 143 144 145 146 147 148 149 |
# File 'lib/gitlab/database/migrations/test_batched_background_runner.rb', line 141 def print_job_progress(batch_name, job) args_phrase = if job.batched_migration.cursor? "#{job.min_cursor} - #{job.max_cursor}" else "#{job.min_value} - #{job.max_value}" end puts(" #{batch_name} (#{args_phrase})") # rubocop:disable Rails/Output -- This runs only in pipelines and should output to the pipeline log end |
#run_job(job) ⇒ Object
rubocop:enable Metrics/AbcSize
135 136 137 138 139 |
# File 'lib/gitlab/database/migrations/test_batched_background_runner.rb', line 135 def run_job(job) set_shared_model_connection do Gitlab::Database::BackgroundMigration::BatchedMigrationWrapper.new(connection: connection).perform(job) end end |
#uniform_fractions ⇒ Object
151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 |
# File 'lib/gitlab/database/migrations/test_batched_background_runner.rb', line 151 def uniform_fractions Enumerator.new do |y| # Generates equally distributed fractions between 0 and 1, with increasing detail as more are pulled from # the enumerator. # 0, 1 (special case) # 1/2 # 1/4, 3/4 # 1/8, 3/8, 5/8, 7/8 # etc. # The pattern here is at each outer loop, the denominator multiplies by 2, and at each inner loop, # the numerator counts up all odd numbers 1 <= n < denominator. y << 0 y << 1 # denominators are each increasing power of 2 denominators = (1..).lazy.map { |exponent| 2**exponent } denominators.each do |denominator| # Numerators at the current step are all odd numbers between 1 and the denominator numerators = (1..denominator).step(2) numerators.each do |numerator| next_frac = numerator.fdiv(denominator) y << next_frac end end end end |