Class: Gitlab::Database::BackgroundMigration::BatchedJob
- Inherits:
-
SharedModel
- Object
- ActiveRecord::Base
- SharedModel
- Gitlab::Database::BackgroundMigration::BatchedJob
- Defined in:
- lib/gitlab/database/background_migration/batched_job.rb
Constant Summary collapse
- MAX_ATTEMPTS =
3
- MIN_BATCH_SIZE =
1
- SUB_BATCH_SIZE_REDUCE_FACTOR =
0.75
- SUB_BATCH_SIZE_THRESHOLD =
65
- STUCK_JOBS_TIMEOUT =
1.hour.freeze
- TIMEOUT_EXCEPTIONS =
[ActiveRecord::StatementTimeout, ActiveRecord::ConnectionTimeoutError, ActiveRecord::AdapterTimeout, ActiveRecord::LockWaitTimeout, ActiveRecord::QueryCanceled].freeze
Class Method Summary collapse
Instance Method Summary collapse
- #can_reduce_sub_batch_size? ⇒ Boolean
- #can_split?(exception) ⇒ Boolean
- #job_attributes ⇒ Object
-
#reduce_sub_batch_size! ⇒ Object
It reduces the size of
sub_batch_size
by 25%. - #split_and_retry! ⇒ Object
- #still_retryable? ⇒ Boolean
-
#sub_batch_exceeds_threshold? ⇒ Boolean
It doesn’t allow sub-batch size to be reduced lower than the threshold.
- #time_efficiency ⇒ Object
- #within_batch_size_boundaries? ⇒ Boolean
Methods inherited from SharedModel
connection, #connection_db_config, connection_pool, using_connection
Class Method Details
.extract_transition_options(args) ⇒ Object
105 106 107 108 109 110 111 112 113 114 |
# File 'lib/gitlab/database/background_migration/batched_job.rb', line 105 def self.(args) error_hash = args.find { |arg| arg[:error].present? } return [] unless error_hash exception = error_hash.fetch(:error) from_sub_batch = error_hash[:from_sub_batch] [exception, from_sub_batch] end |
Instance Method Details
#can_reduce_sub_batch_size? ⇒ Boolean
150 151 152 |
# File 'lib/gitlab/database/background_migration/batched_job.rb', line 150 def can_reduce_sub_batch_size? still_retryable? && within_batch_size_boundaries? end |
#can_split?(exception) ⇒ Boolean
144 145 146 147 148 |
# File 'lib/gitlab/database/background_migration/batched_job.rb', line 144 def can_split?(exception) return if still_retryable? exception.class.in?(TIMEOUT_EXCEPTIONS) && within_batch_size_boundaries? end |
#job_attributes ⇒ Object
116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
# File 'lib/gitlab/database/background_migration/batched_job.rb', line 116 def job_attributes { batch_table: migration_table_name, batch_column: migration_column_name, sub_batch_size: sub_batch_size, pause_ms: pause_ms, job_arguments: migration_job_arguments }.tap do |attributes| if migration_job_class.cursor? attributes[:start_cursor] = min_cursor attributes[:end_cursor] = max_cursor else attributes[:start_id] = min_value attributes[:end_id] = max_value end end end |
#reduce_sub_batch_size! ⇒ Object
It reduces the size of sub_batch_size
by 25%
203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 |
# File 'lib/gitlab/database/background_migration/batched_job.rb', line 203 def reduce_sub_batch_size! raise ReduceSubBatchSizeError, 'Only sub_batch_size of failed jobs can be reduced' unless failed? return if sub_batch_exceeds_threshold? with_lock do actual_sub_batch_size = sub_batch_size reduced_sub_batch_size = (sub_batch_size * SUB_BATCH_SIZE_REDUCE_FACTOR).to_i.clamp(1, batch_size) update!(sub_batch_size: reduced_sub_batch_size) Gitlab::AppLogger.warn( message: 'Sub batch size reduced due to timeout', batched_job_id: id, sub_batch_size: actual_sub_batch_size, reduced_sub_batch_size: reduced_sub_batch_size, attempts: attempts, batched_migration_id: batched_migration.id, job_class_name: migration_job_class_name, job_arguments: migration_job_arguments ) end end |
#split_and_retry! ⇒ Object
154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 |
# File 'lib/gitlab/database/background_migration/batched_job.rb', line 154 def split_and_retry! with_lock do raise SplitAndRetryError, 'Split and retry not yet supported for cursor based jobs' unless max_cursor.nil? raise SplitAndRetryError, 'Only failed jobs can be split' unless failed? new_batch_size = batch_size / 2 next update!(attempts: 0) if new_batch_size < 1 batching_strategy = batched_migration.batch_class.new(connection: self.class.connection) next_batch_bounds = batching_strategy.next_batch( batched_migration.table_name, batched_migration.column_name, batch_min_value: min_value, batch_size: new_batch_size, job_arguments: batched_migration.job_arguments, job_class: batched_migration.job_class ) midpoint = next_batch_bounds.last # We don't want the midpoint to go over the existing max_value because # those IDs would already be in the next batched migration job. # This could happen when a lot of records in the current batch are deleted. # # In this case, we just lower the batch size so that future calls to this # method could eventually split the job if it continues to fail. if midpoint >= max_value update!(batch_size: new_batch_size, attempts: 0) else old_max_value = max_value update!( batch_size: new_batch_size, max_value: midpoint, attempts: 0, started_at: nil, finished_at: nil, metrics: {} ) new_record = dup new_record.min_value = midpoint.next new_record.max_value = old_max_value new_record.save! end end end |
#still_retryable? ⇒ Boolean
227 228 229 |
# File 'lib/gitlab/database/background_migration/batched_job.rb', line 227 def still_retryable? attempts < MAX_ATTEMPTS end |
#sub_batch_exceeds_threshold? ⇒ Boolean
It doesn’t allow sub-batch size to be reduced lower than the threshold
239 240 241 242 243 244 245 |
# File 'lib/gitlab/database/background_migration/batched_job.rb', line 239 def sub_batch_exceeds_threshold? initial_sub_batch_size = batched_migration.sub_batch_size reduced_sub_batch_size = (sub_batch_size * SUB_BATCH_SIZE_REDUCE_FACTOR).to_i diff = initial_sub_batch_size - reduced_sub_batch_size (1.0 * diff / initial_sub_batch_size * 100).round(2) > SUB_BATCH_SIZE_THRESHOLD end |
#time_efficiency ⇒ Object
134 135 136 137 138 139 140 141 142 |
# File 'lib/gitlab/database/background_migration/batched_job.rb', line 134 def time_efficiency return unless succeeded? return unless finished_at && started_at duration = finished_at - started_at # TODO: Switch to individual job interval (prereq: https://gitlab.com/gitlab-org/gitlab/-/issues/328801) duration.to_f / batched_migration.interval end |
#within_batch_size_boundaries? ⇒ Boolean
231 232 233 |
# File 'lib/gitlab/database/background_migration/batched_job.rb', line 231 def within_batch_size_boundaries? batch_size > MIN_BATCH_SIZE && batch_size > sub_batch_size end |