Class: RightScraper::Retrievers::Git

Inherits:
CheckoutBasedRetriever show all
Defined in:
lib/right_scraper/retrievers/git.rb

Overview

Retriever for resources stored in a git repository.

Constant Summary collapse

@@available =
false

Instance Attribute Summary

Attributes inherited from Base

#max_bytes, #max_seconds, #repo_dir, #repository

Instance Method Summary collapse

Methods inherited from Base

#initialize, repo_dir

Constructor Details

This class inherits a constructor from RightScraper::Retrievers::Base

Instance Method Details

#available?Boolean

Determines if downloader is available.

Returns:

  • (Boolean)


32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# File 'lib/right_scraper/retrievers/git.rb', line 32

def available?
  unless @@available
    begin
      require 'git'
      # note that require 'git' does the same version check on load but
      # we don't want to assume any particular implementation.
      #
      # FIX: we might want to parse the result and require a minimum git
      # client version.
      cmd = "git --version"
      `#{cmd}`
      if $?.success?
        @@available = true
      else
        raise RetrieverError, "\"#{cmd}\" exited with #{$?.exitstatus}"
      end
    rescue
      @logger.note_error($!, :available, "git retriever is unavailable")
    end
  end
  @@available
end

#branch?(git, name) ⇒ Boolean

Returns:

  • (Boolean)


139
140
141
# File 'lib/right_scraper/retrievers/git.rb', line 139

def branch?(git, name)
  git.branches.find {|t| t.name == name}
end

#do_checkoutObject

Clone the remote repository. The operations are as follows:

  • clone repository to @repo_dir

  • checkout #tag

  • update @repository#tag



110
111
112
113
114
115
116
117
118
# File 'lib/right_scraper/retrievers/git.rb', line 110

def do_checkout
  super
  git = @logger.operation(:cloning, "to #{@repo_dir}") do
    ::Git.clone(@repository.url, @repo_dir)
  end
  do_fetch(git)
  do_checkout_revision(git)
  do_update_tag git
end

#do_checkout_revision(git) ⇒ Object



120
121
122
123
124
125
126
127
128
129
130
131
132
133
# File 'lib/right_scraper/retrievers/git.rb', line 120

def do_checkout_revision(git)
  @logger.operation(:checkout_revision) do
    case
    when tag?(git, repo_tag) && branch?(git, repo_tag) then
      raise "Ambiguous reference: '#{repo_tag}' denotes both a branch and a tag"
    when branch = find_remote_branch(git, repo_tag) then
      branch.checkout
    when branch = find_local_branch(git, repo_tag) then
      branch.checkout
    else
      git.checkout(repo_tag)
    end
  end if repo_tag
end

#do_fetch(git) ⇒ Object



77
78
79
80
81
82
# File 'lib/right_scraper/retrievers/git.rb', line 77

def do_fetch(git)
  @logger.operation(:fetch) do
    git.tags.each {|tag| git.lib.tag(['-d', tag.name])}
    git.fetch(['--all', '--prune', '--tags'])
  end
end

#do_updateObject

Incrementally update the checkout. The operations are as follows:

  • checkout #tag

  • if #tag is the head of a branch:

    • find that branch’s remote

    • fetch it

    • merge changes

    • update @repository#tag

Note that if #tag is a SHA revision or a tag that exists in the current repository, no fetching is done.



93
94
95
96
97
98
99
# File 'lib/right_scraper/retrievers/git.rb', line 93

def do_update
  git = ::Git.open(@repo_dir)
  do_fetch(git)
  git.reset_hard
  do_checkout_revision(git)
  do_update_tag(git)
end

#do_update_tag(git) ⇒ Object



101
102
103
104
# File 'lib/right_scraper/retrievers/git.rb', line 101

def do_update_tag(git)
  @repository = @repository.clone
  @repository.tag = git.gtree("HEAD").sha
end

#exists?Boolean

Return true if a checkout exists. Currently tests for .git in the checkout.

Returns ===

Boolean

true if the checkout already exists (and thus incremental updating can occur).

Returns:

  • (Boolean)


73
74
75
# File 'lib/right_scraper/retrievers/git.rb', line 73

def exists?
  File.exists?(File.join(@repo_dir, '.git'))
end

#find_branch(git, tag) ⇒ Object



149
150
151
# File 'lib/right_scraper/retrievers/git.rb', line 149

def find_branch(git, tag)
  find_local_branch(git, tag) || find_remote_branch(git, tag)
end

#find_local_branch(git, name) ⇒ Object



153
154
155
# File 'lib/right_scraper/retrievers/git.rb', line 153

def find_local_branch(git, name)
  git.branches.local.find {|b| b.name == name}
end

#find_remote_branch(git, name) ⇒ Object



157
158
159
# File 'lib/right_scraper/retrievers/git.rb', line 157

def find_remote_branch(git, name)
  git.branches.remote.find {|b| b.name == name}
end

#ignorable_pathsObject

Ignore .git directories.



162
163
164
# File 'lib/right_scraper/retrievers/git.rb', line 162

def ignorable_paths
  ['.git']
end

#repo_tagObject



143
144
145
146
147
# File 'lib/right_scraper/retrievers/git.rb', line 143

def repo_tag
  name = (@repository.tag || "master").chomp
  name = "master" if name.empty?
  name
end

#retrieveObject

In addition to normal retriever initialization, if the underlying repository has a credential we need to initialize a fresh SSHAgent and add the credential to it.

Raises:



58
59
60
61
62
63
64
65
# File 'lib/right_scraper/retrievers/git.rb', line 58

def retrieve
  raise RetrieverError.new("git retriever is unavailable") unless available?
  RightScraper::Processes::SSHAgent.with do |agent|
    agent.add_key(@repository.first_credential) unless
      @repository.first_credential.nil?
    super
  end
end

#tag?(git, name) ⇒ Boolean

Returns:

  • (Boolean)


135
136
137
# File 'lib/right_scraper/retrievers/git.rb', line 135

def tag?(git, name)
  git.tags.find {|t| t.name == name}
end