Class: Down::Wget

Inherits:
Backend show all
Defined in:
lib/down/wget.rb

Overview

Provides streaming downloads implemented with the wget command-line tool. The design is very similar to Down::Http.

Defined Under Namespace

Modules: DownloadedFile Classes: Command

Instance Method Summary collapse

Methods inherited from Backend

download, open

Constructor Details

#initialize(*arguments) ⇒ Wget

Initializes the backend with common defaults.



20
21
22
23
24
25
26
27
28
# File 'lib/down/wget.rb', line 20

def initialize(*arguments)
  @arguments = [
    user_agent:      "Down/#{Down::VERSION}",
    max_redirect:    2,
    dns_timeout:     30,
    connect_timeout: 30,
    read_timeout:    30,
  ] + arguments
end

Instance Method Details

#download(url, *args, max_size: nil, content_length_proc: nil, progress_proc: nil, destination: nil, **options) ⇒ Object

Downlods the remote file to disk. Accepts wget command-line options and some additional options as well.



32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# File 'lib/down/wget.rb', line 32

def download(url, *args, max_size: nil, content_length_proc: nil, progress_proc: nil, destination: nil, **options)
  io = open(url, *args, **options, rewindable: false)

  content_length_proc.call(io.size) if content_length_proc && io.size

  if max_size && io.size && io.size > max_size
    raise Down::TooLarge, "file is too large (#{io.size/1024/1024}MB, max is #{max_size/1024/1024}MB)"
  end

  extname  = File.extname(URI(url).path)
  tempfile = Tempfile.new(["down-wget", extname], binmode: true)

  until io.eof?
    chunk = io.readpartial(nil, buffer ||= String.new)

    tempfile.write(chunk)

    progress_proc.call(tempfile.size) if progress_proc

    if max_size && tempfile.size > max_size
      raise Down::TooLarge, "file is too large (#{tempfile.size/1024/1024}MB, max is #{max_size/1024/1024}MB)"
    end
  end

  tempfile.open # flush written content

  tempfile.extend Down::Wget::DownloadedFile
  tempfile.url     = url
  tempfile.headers = io.data[:headers]

  download_result(tempfile, destination)
rescue
  tempfile.close! if tempfile
  raise
ensure
  io.close if io
end

#open(url, *args, rewindable: true, **options) ⇒ Object

Starts retrieving the remote file and returns an IO-like object which downloads the response body on-demand. Accepts wget command-line options.



72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# File 'lib/down/wget.rb', line 72

def open(url, *args, rewindable: true, **options)
  arguments = generate_command(url, *args, **options)

  command = Down::Wget::Command.execute(arguments)
  # Wrap the wget command output in an IO-like object.
  output  = Down::ChunkedIO.new(
    chunks:     command.enum_for(:output),
    on_close:   command.method(:terminate),
    rewindable: false,
  )

  # https://github.com/tmm1/http_parser.rb/issues/29#issuecomment-309976363
  header_string  = output.readpartial
  header_string << output.readpartial until header_string.include?("\r\n\r\n")
  header_string, first_chunk = header_string.split("\r\n\r\n", 2)

  # Use an HTTP parser to parse out the response headers.
  parser = HTTP::Parser.new
  parser << header_string

  if parser.headers.nil?
    output.close
    raise Down::Error, "failed to parse response headers"
  end

  headers = parser.headers
  status  = parser.status_code

  content_length = headers["Content-Length"].to_i if headers["Content-Length"]
  charset        = headers["Content-Type"][/;\s*charset=([^;]+)/i, 1] if headers["Content-Type"]

  # Create an Enumerator which will lazily retrieve chunks of response body.
  chunks = Enumerator.new do |yielder|
    yielder << first_chunk if first_chunk
    yielder << output.readpartial until output.eof?
  end

  Down::ChunkedIO.new(
    chunks:     chunks,
    size:       content_length,
    encoding:   charset,
    rewindable: rewindable,
    on_close:   output.method(:close),
    data:       { status: status, headers: headers },
  )
end