Class: Gitlab::Database::Migrations::TestBatchedBackgroundRunner

Inherits:
BaseBackgroundRunner show all
Includes:
DynamicModelHelpers
Defined in:
lib/gitlab/database/migrations/test_batched_background_runner.rb

Constant Summary collapse

MIGRATION_DETAILS_FILE_NAME =
'details.json'

Constants included from DynamicModelHelpers

DynamicModelHelpers::BATCH_SIZE

Instance Attribute Summary

Attributes inherited from BaseBackgroundRunner

#connection, #result_dir

Instance Method Summary collapse

Methods included from DynamicModelHelpers

define_batchable_model, #each_batch, #each_batch_range

Methods inherited from BaseBackgroundRunner

#run_jobs

Constructor Details

#initialize(result_dir:, connection:, from_id:) ⇒ TestBatchedBackgroundRunner

Returns a new instance of TestBatchedBackgroundRunner.



11
12
13
14
15
# File 'lib/gitlab/database/migrations/test_batched_background_runner.rb', line 11

def initialize(result_dir:, connection:, from_id:)
  super(result_dir: result_dir, connection: connection)
  @connection = connection
  @from_id = from_id
end

Instance Method Details

#jobs_by_migration_nameObject

rubocop:disable Metrics/AbcSize -- This method is temporarily more complex while it deals with both cursor and non-cursor migrations. The complexity will significantly decrease when non-cursor migration support is removed.



20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# File 'lib/gitlab/database/migrations/test_batched_background_runner.rb', line 20

def jobs_by_migration_name
  set_shared_model_connection do
    Gitlab::Database::BackgroundMigration::BatchedMigration
      .executable
      .where('id > ?', from_id)
      .to_h do |migration|
      batching_strategy = migration.batch_class.new(connection: connection)

      is_cursor = migration.cursor?

      # Pretend every migration is a cursor migration. When actually running the job,
      # we can unwrap the cursor if it is not.
      cursor_columns = is_cursor ? migration.job_class.cursor_columns : [migration.column_name]

      # Wrap the single result into an array (that we pretend is a cursor) if this
      # is not a cursor migration. (next_min_value has an if check on cursor? and returns either array or int)
      table_min_cursor = Array.wrap(migration.next_min_value)

      ordering = cursor_columns.map { |c| { c => :desc } }

      rows_ordered_backwards = define_batchable_model(migration.table_name, connection: connection)
                                .order(*ordering)
      # If only one column, pluck.first returns a single value for that column instead of an array of
      # all (1) column(s)
      # So wrap the result for consistency between 1 and many columns
      table_max_cursor = Array.wrap(rows_ordered_backwards.pick(*cursor_columns))

      # variance is the portion of the batch range that we shrink between variance * 0 and variance * 1
      # to pick actual batches to sample.

      # Here we're going to do something that is explicitly WRONG, but good enough - we assume that we can
      # just scale the first element of the cursor to get a reasonable percentage of the way through the table.
      # This is really not true at all, but it's close enough for testing.
      # For the rest of the components of our example cursors, we'll reuse parts of the end cursors for each
      # batch for the start cursors of the next batch
      variance = table_max_cursor[0] - table_min_cursor[0]

      batch_first_elems = uniform_fractions.lazy.map { |frac| (variance * frac).to_i }

      jobs_to_sample = Enumerator.new do |y|
        completed_batches = []
        # We construct the starting cursor from the end of the prev loop,
        # or just the beginning of the table on the first loop
        # This way, cursors for our batches start at interesting places in all of their positions
        prev_end_cursor = table_min_cursor

        loop do
          first_elem = batch_first_elems.next
          batch_start = [first_elem] + prev_end_cursor[1..]
          break if completed_batches.any? { |batch| batch.cover?(batch_start) }

          # The current block is lazily evaluated as part of the jobs_to_sample enumerable
          # so it executes after the enclosing using_connection block has already executed
          # Therefore we need to re-associate with the explicit connection again
          Gitlab::Database::SharedModel.using_connection(connection) do
            next_bounds = batching_strategy.next_batch(
              migration.table_name,
              migration.column_name,
              batch_min_value: is_cursor ? batch_start : batch_start[0],
              batch_size: migration.batch_size,
              job_class: migration.job_class,
              job_arguments: migration.job_arguments
            )

            # If no rows match, the next_bounds are nil.
            # This will only happen if there are zero rows to match from the current sampling point to the end
            # of the table
            # Simulate the approach in the actual background migration worker by not sampling a batch
            # from this range.
            # (The actual worker would finish the migration, but we may find batches that can be sampled
            # elsewhere in the table)
            if next_bounds.nil?
              # If the migration has no work to do across the entire table, sampling can get stuck
              # in a loop if we don't mark the attempted batches as completed
              # We need to guess a size for this. The batch size of the migration is way too big in all
              # cases with a 2-element or more cursor, but it doesn't really matter so we just guess that.
              synthetic_cursor_offset = migration.batch_size
              batch_end = batch_start.dup
              batch_end[0] += synthetic_cursor_offset
              completed_batches << (batch_start..batch_end)
              next
            end

            batch_min, batch_max = next_bounds

            # These are ints if not a cursor, wrap them to maintain the illusion that everything is a cursor

            job = migration.create_batched_job!(batch_min, batch_max)

            # Wrap the batch min/max back as cursors if the migration was not cursor-based
            batch_min = Array.wrap(batch_min)
            batch_max = Array.wrap(batch_max)

            # Save the max as cursor details for the next loop so that we test
            # interesting cursor positions.
            prev_end_cursor = batch_max

            completed_batches << (batch_min..batch_max)

            y << job
          end
        end
      end

      job_class_name = migration.job_class_name

      export_migration_details(job_class_name,
        migration.slice(:interval, :total_tuple_count, :max_batch_size))

      [job_class_name, jobs_to_sample]
    end
  end
end


141
142
143
144
145
146
147
148
149
# File 'lib/gitlab/database/migrations/test_batched_background_runner.rb', line 141

def print_job_progress(batch_name, job)
  args_phrase = if job.batched_migration.cursor?
                  "#{job.min_cursor} - #{job.max_cursor}"
                else
                  "#{job.min_value} - #{job.max_value}"
                end

  puts("  #{batch_name} (#{args_phrase})") # rubocop:disable Rails/Output -- This runs only in pipelines and should output to the pipeline log
end

#run_job(job) ⇒ Object

rubocop:enable Metrics/AbcSize



135
136
137
138
139
# File 'lib/gitlab/database/migrations/test_batched_background_runner.rb', line 135

def run_job(job)
  set_shared_model_connection do
    Gitlab::Database::BackgroundMigration::BatchedMigrationWrapper.new(connection: connection).perform(job)
  end
end

#uniform_fractionsObject



151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
# File 'lib/gitlab/database/migrations/test_batched_background_runner.rb', line 151

def uniform_fractions
  Enumerator.new do |y|
    # Generates equally distributed fractions between 0 and 1, with increasing detail as more are pulled from
    # the enumerator.
    # 0, 1 (special case)
    # 1/2
    # 1/4, 3/4
    # 1/8, 3/8, 5/8, 7/8
    # etc.
    # The pattern here is at each outer loop, the denominator multiplies by 2, and at each inner loop,
    # the numerator counts up all odd numbers 1 <= n < denominator.
    y << 0
    y << 1

    # denominators are each increasing power of 2
    denominators = (1..).lazy.map { |exponent| 2**exponent }

    denominators.each do |denominator|
      # Numerators at the current step are all odd numbers between 1 and the denominator
      numerators = (1..denominator).step(2)

      numerators.each do |numerator|
        next_frac = numerator.fdiv(denominator)
        y << next_frac
      end
    end
  end
end