Class: Gitlab::GithubImport::Importer::PullRequestsImporter

Inherits:
Object
  • Object
show all
Includes:
ParallelScheduling
Defined in:
lib/gitlab/github_import/importer/pull_requests_importer.rb

Constant Summary

Constants included from ParallelScheduling

ParallelScheduling::ALREADY_IMPORTED_CACHE_KEY, ParallelScheduling::JOB_WAITER_CACHE_KEY, ParallelScheduling::JOB_WAITER_REMAINING_CACHE_KEY

Instance Attribute Summary

Attributes included from ParallelScheduling

#already_imported_cache_key, #client, #job_waiter_cache_key, #job_waiter_remaining_cache_key, #page_counter, #project

Instance Method Summary collapse

Methods included from ParallelScheduling

#abort_on_failure, #already_imported?, #execute, #increment_object_counter?, #initialize, #mark_as_imported, #parallel?, #parallel_import, #sequential_import, #spread_parallel_import

Instance Method Details

#collection_methodObject



69
70
71
# File 'lib/gitlab/github_import/importer/pull_requests_importer.rb', line 69

def collection_method
  :pull_requests
end

#collection_optionsObject



73
74
75
# File 'lib/gitlab/github_import/importer/pull_requests_importer.rb', line 73

def collection_options
  { state: 'all', sort: 'created', direction: 'asc' }
end

#commit_exists?(sha) ⇒ Boolean

Returns:

  • (Boolean)


65
66
67
# File 'lib/gitlab/github_import/importer/pull_requests_importer.rb', line 65

def commit_exists?(sha)
  project.repository.commit(sha).present?
end

#each_object_to_importObject



29
30
31
32
33
34
# File 'lib/gitlab/github_import/importer/pull_requests_importer.rb', line 29

def each_object_to_import
  super do |pr|
    update_repository if update_repository?(pr)
    yield pr
  end
end

#id_for_already_imported_cache(pr) ⇒ Object



21
22
23
# File 'lib/gitlab/github_import/importer/pull_requests_importer.rb', line 21

def id_for_already_imported_cache(pr)
  pr[:number]
end

#importer_classObject



9
10
11
# File 'lib/gitlab/github_import/importer/pull_requests_importer.rb', line 9

def importer_class
  PullRequestImporter
end

#object_typeObject



25
26
27
# File 'lib/gitlab/github_import/importer/pull_requests_importer.rb', line 25

def object_type
  :pull_request
end

#parallel_import_batchObject



77
78
79
# File 'lib/gitlab/github_import/importer/pull_requests_importer.rb', line 77

def parallel_import_batch
  { size: 200, delay: 1.minute }
end

#repository_updates_counterObject



81
82
83
84
85
86
# File 'lib/gitlab/github_import/importer/pull_requests_importer.rb', line 81

def repository_updates_counter
  @repository_updates_counter ||= Gitlab::Metrics.counter(
    :github_importer_repository_updates,
    'The number of times repositories have to be updated again'
  )
end

#representation_classObject



13
14
15
# File 'lib/gitlab/github_import/importer/pull_requests_importer.rb', line 13

def representation_class
  Gitlab::GithubImport::Representation::PullRequest
end

#sidekiq_worker_classObject



17
18
19
# File 'lib/gitlab/github_import/importer/pull_requests_importer.rb', line 17

def sidekiq_worker_class
  ImportPullRequestWorker
end

#update_repositoryObject



36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# File 'lib/gitlab/github_import/importer/pull_requests_importer.rb', line 36

def update_repository
  # We set this column _before_ fetching the repository, and this is
  # deliberate. If we were to update this column after the fetch we may
  # miss out on changes pushed during the fetch or between the fetch and
  # updating the timestamp.
  project.touch(:last_repository_updated_at)

  project.repository.fetch_remote(project.import_url, refmap: Gitlab::GithubImport.refmap, forced: true)

  pname = project.path_with_namespace

  Gitlab::Import::Logger.info(
    message: 'GitHub importer finished updating repository',
    project_name: pname
  )

  repository_updates_counter.increment
end

#update_repository?(pr) ⇒ Boolean

Returns:

  • (Boolean)


55
56
57
58
59
60
61
62
63
# File 'lib/gitlab/github_import/importer/pull_requests_importer.rb', line 55

def update_repository?(pr)
  last_update = project.last_repository_updated_at || project.created_at

  return false if pr[:updated_at] < last_update

  # PRs may be updated without there actually being new commits, thus we
  # check to make sure we only re-fetch if truly necessary.
  !(commit_exists?(pr.dig(:head, :sha)) && commit_exists?(pr.dig(:base, :sha)))
end