Class: Gitlab::BackgroundMigration::RecalculateVulnerabilitiesOccurrencesUuid

Inherits:
Object
  • Object
show all
Defined in:
lib/gitlab/background_migration/recalculate_vulnerabilities_occurrences_uuid.rb

Overview

rubocop: disable Style/Documentation

Defined Under Namespace

Classes: CalculateFindingUUID, VulnerabilitiesFinding, VulnerabilitiesFindingPipeline, VulnerabilitiesIdentifier, Vulnerability, VulnerabilityFindingSignature

Instance Method Summary collapse

Instance Method Details

#perform(start_id, end_id) ⇒ Object

rubocop: disable Metrics/AbcSize,Metrics/MethodLength,Metrics/BlockLength


81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
# File 'lib/gitlab/background_migration/recalculate_vulnerabilities_occurrences_uuid.rb', line 81

def perform(start_id, end_id)
  unless Feature.enabled?(:migrate_vulnerability_finding_uuids)
    return log_info('Migration is disabled by the feature flag', start_id: start_id, end_id: end_id)
  end

  log_info('Migration started', start_id: start_id, end_id: end_id)

  VulnerabilitiesFinding
    .joins(:primary_identifier)
    .includes(:signatures)
    .select(:id, :report_type, :primary_identifier_id, :fingerprint, :location_fingerprint, :project_id, :created_at, :vulnerability_id, :uuid)
    .where(id: start_id..end_id)
    .each_batch(of: 50) do |relation|
    duplicates = find_duplicates(relation)
    remove_findings(ids: duplicates) if duplicates.present?

    to_update = relation.reject { |finding| duplicates.include?(finding.id) }

    begin
      known_uuids = Set.new
      to_be_deleted = []

      mappings = to_update.each_with_object({}) do |finding, hash|
        uuid = calculate_uuid_v5_for_finding(finding)

        if known_uuids.add?(uuid)
          hash[finding] = { uuid: uuid }
        else
          to_be_deleted << finding.id
        end
      end

      # It is technically still possible to have duplicate uuids
      # if the data integrity is broken somehow and the primary identifiers of
      # the findings are pointing to different projects with the same fingerprint values.
      if to_be_deleted.present?
        log_info('Conflicting UUIDs found within the batch', finding_ids: to_be_deleted)

        remove_findings(ids: to_be_deleted)
      end

      ::Gitlab::Database::BulkUpdate.execute(%i[uuid], mappings) if mappings.present?

      log_info('Recalculation is done', finding_ids: mappings.keys.pluck(:id))
    rescue ActiveRecord::RecordNotUnique => error
      log_info('RecordNotUnique error received')

      match_data = /\(uuid\)=\((?<uuid>\S{36})\)/.match(error.message)

      # This exception returns the **correct** UUIDv5 which probably comes from a later record
      # and it's the one we can drop in the easiest way before retrying the UPDATE query
      if match_data
        uuid = match_data[:uuid]
        log_info('Conflicting UUID found', uuid: uuid)

        id = VulnerabilitiesFinding.find_by(uuid: uuid)&.id
        remove_findings(ids: id) if id
        retry
      else
        log_error('Couldnt find conflicting uuid')

        Gitlab::ErrorTracking.track_and_raise_exception(error)
      end
    end
  end

  mark_job_as_succeeded(start_id, end_id)
rescue StandardError => error
  log_error('An exception happened')

  Gitlab::ErrorTracking.track_and_raise_exception(error)
end