Class: RightScraper::Retrievers::Download

Inherits:
Base
  • Object
show all
Defined in:
lib/right_scraper/retrievers/download.rb

Overview

A retriever for resources stored in archives on a web server somewhere. Uses command line curl and command line tar.

Defined Under Namespace

Classes: DownloadError

Constant Summary collapse

@@available =
false

Instance Attribute Summary

Attributes inherited from Base

#max_bytes, #max_seconds, #repo_dir, #repository

Instance Method Summary collapse

Methods inherited from Base

#ignorable_paths, #initialize, repo_dir

Constructor Details

This class inherits a constructor from RightScraper::Retrievers::Base

Instance Method Details

#available?Boolean

Determines if downloader is available.

Returns:



40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# File 'lib/right_scraper/retrievers/download.rb', line 40

def available?
  unless @@available
    begin
      # FIX: we might want to parse the result and require a minimum curl
      # version.
      cmd = "curl --version"
      `#{cmd}`
      if $?.success?
        @@available = true
      else
        raise RetrieverError, "\"#{cmd}\" exited with #{$?.exitstatus}"
      end
    rescue
      @logger.note_error($!, :available, "download retriever is unavailable")
    end
  end
  @@available
end

#exit_download(status) ⇒ Object



159
160
161
162
163
164
165
166
# File 'lib/right_scraper/retrievers/download.rb', line 159

def exit_download(status)
  unless status.success?
    @output.safe_buffer_data("Exit code = #{status.exitstatus}")
    raise DownloadError, "Downloader failed: #{@output.display_text}"
  end
  @logger.note_phase(:commit, :running_command, @cmd.first)
  true
end

#note_tag(file) ⇒ Object

Amend @repository with the tag information from the downloaded file.

Parameters

file(String)

file that was downloaded



174
175
176
177
178
179
180
# File 'lib/right_scraper/retrievers/download.rb', line 174

def note_tag(file)
  digest = Digest::SHA1.new
  File.open(file) {|f| digest << f.read(4096) }
  repo = @repository.clone
  repo.tag = digest.hexdigest
  @repository = repo
end

#output_download(data) ⇒ Object



147
148
149
# File 'lib/right_scraper/retrievers/download.rb', line 147

def output_download(data)
  @output.safe_buffer_data(data)
end

#pid_download(pid) ⇒ Object



142
143
144
145
# File 'lib/right_scraper/retrievers/download.rb', line 142

def pid_download(pid)
  @logger.note_phase(:begin, :running_command, @cmd.first)
  true
end

#retrieveObject

Download tarball and unpack it

Raises:



65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# File 'lib/right_scraper/retrievers/download.rb', line 65

def retrieve
  raise RetrieverError.new("download retriever is unavailable") unless available?
  FileUtils.remove_entry_secure @repo_dir if File.exists?(@repo_dir)
  FileUtils.remove_entry_secure workdir if File.exists?(workdir)
  FileUtils.mkdir_p @repo_dir
  FileUtils.mkdir_p workdir
  file = File.join(workdir, "package")

  @logger.operation(:downloading) do
    credential_command = if @repository.first_credential && @repository.second_credential
      ['-u', "#{@repository.first_credential}:#{@repository.second_credential}"]
    else
      []
    end
    @output = ::RightScale::RightPopen::SafeOutputBuffer.new
    @cmd = [
      'curl',
      '--silent', '--show-error', '--location', '--fail',
      '--location-trusted', '-o', file, credential_command,
      @repository.url
    ].flatten
    begin
      ::RightScale::RightPopen.popen3_sync(
        @cmd,
        :target             => self,
        :pid_handler        => :pid_download,
        :timeout_handler    => :timeout_download,
        :size_limit_handler => :size_limit_download,
        :exit_handler       => :exit_download,
        :stderr_handler     => :output_download,
        :stdout_handler     => :output_download,
        :inherit_io         => true,  # avoid killing any rails connection
        :watch_directory    => workdir,
        :size_limit_bytes   => @max_bytes,
        :timeout_seconds    => @max_seconds)
    rescue Exception => e
      @logger.note_phase(:abort, :running_command, 'curl', e)
      raise
    end
  end

  note_tag(file)

  @logger.operation(:unpacking) do
    path = @repository.to_url.path
    if path =~ /\.gz$/
      extraction = "xzf"
    elsif path =~ /\.bz2$/
      extraction = "xjf"
    else
      extraction = "xf"
    end
    Dir.chdir(@repo_dir) do
      @output = ::RightScale::RightPopen::SafeOutputBuffer.new
      @cmd = ['tar', extraction, file]
      begin
        ::RightScale::RightPopen.popen3_sync(
          @cmd,
          :target             => self,
          :pid_handler        => :pid_download,
          :timeout_handler    => :timeout_download,
          :size_limit_handler => :size_limit_download,
          :exit_handler       => :exit_download,
          :stderr_handler     => :output_download,
          :stdout_handler     => :output_download,
          :inherit_io         => true,  # avoid killing any rails connection
          :watch_directory    => @repo_dir,
          :size_limit_bytes   => @max_bytes,
          :timeout_seconds    => @max_seconds)
      rescue Exception => e
        @logger.note_phase(:abort, :running_command, @cmd.first, e)
        raise
      end
    end
  end
end

#size_limit_downloadObject

Raises:



155
156
157
# File 'lib/right_scraper/retrievers/download.rb', line 155

def size_limit_download
  raise DownloadError, "Downloader exceeded size limit"
end

#timeout_downloadObject

Raises:



151
152
153
# File 'lib/right_scraper/retrievers/download.rb', line 151

def timeout_download
  raise DownloadError, "Downloader timed out"
end

#workdirObject

Directory used to download tarballs



60
61
62
# File 'lib/right_scraper/retrievers/download.rb', line 60

def workdir
  @workdir ||= ::File.join(::File.dirname(@repo_dir), 'download')
end