Module: OpenURI

Defined in:
lib/open-uri.rb

Overview

OpenURI is an easy-to-use wrapper for net/http, net/https and net/ftp.

Example

It is possible to open http/https/ftp URL as usual like opening a file:

open("http://www.ruby-lang.org/") {|f|
  f.each_line {|line| p line}
}

The opened file has several methods for meta information as follows since it is extended by OpenURI::Meta.

open("http://www.ruby-lang.org/en") {|f|
  f.each_line {|line| p line}
  p f.base_uri         # <URI::HTTP:0x40e6ef2 URL:http://www.ruby-lang.org/en/>
  p f.content_type     # "text/html"
  p f.charset          # "iso-8859-1"
  p f.content_encoding # []
  p f.last_modified    # Thu Dec 05 02:45:02 UTC 2002
}

Additional header fields can be specified by an optional hash argument.

open("http://www.ruby-lang.org/en/",
  "User-Agent" => "Ruby/#{RUBY_VERSION}",
  "From" => "foo@bar.invalid",
  "Referer" => "http://www.ruby-lang.org/") {|f|
  # ...
}

The environment variables such as http_proxy, https_proxy and ftp_proxy are in effect by default. :proxy => nil disables proxy.

open("http://www.ruby-lang.org/en/raa.html", :proxy => nil) {|f|
  # ...
}

URI objects can be opened in a similar way.

uri = URI.parse("http://www.ruby-lang.org/en/")
uri.open {|f|
  # ...
}

URI objects can be read directly. The returned string is also extended by OpenURI::Meta.

str = uri.read
p str.base_uri

Author

Tanaka Akira <akr@m17n.org>

Constant Summary

Class Method Summary collapse

Class Method Details

.check_options(options) ⇒ Object

:nodoc:



108
109
110
111
112
113
114
115
# File 'lib/open-uri.rb', line 108

def OpenURI.check_options(options) # :nodoc:
  options.each {|k, v|
    next unless Symbol === k
    unless Options.include? k
      raise ArgumentError, "unrecognized option: #{k}"
    end
  }
end

.open_http(buf, target, proxy, options) ⇒ Object

:nodoc:



246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
# File 'lib/open-uri.rb', line 246

def OpenURI.open_http(buf, target, proxy, options) # :nodoc:
  if proxy
    proxy_uri, proxy_user, proxy_pass = proxy
    raise "Non-HTTP proxy URI: #{proxy_uri}" if proxy_uri.class != URI::HTTP
  end

  if target.userinfo && "1.9.0" <= RUBY_VERSION
    # don't raise for 1.8 because compatibility.
    raise ArgumentError, "userinfo not supported.  [RFC3986]"
  end

  header = {}
  options.each {|k, v| header[k] = v if String === k }

  require 'net/http'
  klass = Net::HTTP
  if URI::HTTP === target
    # HTTP or HTTPS
    if proxy
      if proxy_user && proxy_pass
        klass = Net::HTTP::Proxy(proxy_uri.host, proxy_uri.port, proxy_user, proxy_pass)
      else
        klass = Net::HTTP::Proxy(proxy_uri.host, proxy_uri.port)
      end
    end
    target_host = target.host
    target_port = target.port
    request_uri = target.request_uri
  else
    # FTP over HTTP proxy
    target_host = proxy_uri.host
    target_port = proxy_uri.port
    request_uri = target.to_s
    if proxy_user && proxy_pass
      header["Proxy-Authorization"] = 'Basic ' + ["#{proxy_user}:#{proxy_pass}"].pack('m').delete("\r\n")
    end
  end

  http = klass.new(target_host, target_port)
  if target.class == URI::HTTPS
    require 'net/https'
    http.use_ssl = true
    http.verify_mode = options[:ssl_verify_mode] || OpenSSL::SSL::VERIFY_PEER
    store = OpenSSL::X509::Store.new
    if options[:ssl_ca_cert]
      if File.directory? options[:ssl_ca_cert]
        store.add_path options[:ssl_ca_cert]
      else
        store.add_file options[:ssl_ca_cert]
      end
    else
      store.set_default_paths
    end
    http.cert_store = store
  end
  if options.include? :read_timeout
    http.read_timeout = options[:read_timeout]
  end

  resp = nil
  http.start {
    req = Net::HTTP::Get.new(request_uri, header)
    if options.include? :http_basic_authentication
      user, pass = options[:http_basic_authentication]
      req.basic_auth user, pass
    end
    http.request(req) {|response|
      resp = response
      if options[:content_length_proc] && Net::HTTPSuccess === resp
        if resp.key?('Content-Length')
          options[:content_length_proc].call(resp['Content-Length'].to_i)
        else
          options[:content_length_proc].call(nil)
        end
      end
      resp.read_body {|str|
        buf << str
        if options[:progress_proc] && Net::HTTPSuccess === resp
          options[:progress_proc].call(buf.size)
        end
      }
    }
  }
  io = buf.io
  io.rewind
  io.status = [resp.code, resp.message]
  resp.each {|name,value| buf.io.meta_add_field name, value }
  case resp
  when Net::HTTPSuccess
  when Net::HTTPMovedPermanently, # 301
       Net::HTTPFound, # 302
       Net::HTTPSeeOther, # 303
       Net::HTTPTemporaryRedirect # 307
    begin
      loc_uri = URI.parse(resp['location'])
    rescue URI::InvalidURIError
      raise OpenURI::HTTPError.new(io.status.join(' ') + ' (Invalid Location URI)', io)
    end
    throw :open_uri_redirect, loc_uri
  else
    raise OpenURI::HTTPError.new(io.status.join(' '), io)
  end
end

.open_loop(uri, options) ⇒ Object

:nodoc:



159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
# File 'lib/open-uri.rb', line 159

def OpenURI.open_loop(uri, options) # :nodoc:
  proxy_opts = []
  proxy_opts << :proxy_http_basic_authentication if options.include? :proxy_http_basic_authentication
  proxy_opts << :proxy if options.include? :proxy
  proxy_opts.compact!
  if 1 < proxy_opts.length
    raise ArgumentError, "multiple proxy options specified"
  end
  case proxy_opts.first
  when :proxy_http_basic_authentication
    opt_proxy, proxy_user, proxy_pass = options.fetch(:proxy_http_basic_authentication)
    proxy_user = proxy_user.to_str
    proxy_pass = proxy_pass.to_str
    if opt_proxy == true
      raise ArgumentError.new("Invalid authenticated proxy option: #{options[:proxy_http_basic_authentication].inspect}")
    end
  when :proxy
    opt_proxy = options.fetch(:proxy)
    proxy_user = nil
    proxy_pass = nil
  when nil
    opt_proxy = true
    proxy_user = nil
    proxy_pass = nil
  end
  case opt_proxy
  when true
    find_proxy = lambda {|u| pxy = u.find_proxy; pxy ? [pxy, nil, nil] : nil}
  when nil, false
    find_proxy = lambda {|u| nil}
  when String
    opt_proxy = URI.parse(opt_proxy)
    find_proxy = lambda {|u| [opt_proxy, proxy_user, proxy_pass]}
  when URI::Generic
    find_proxy = lambda {|u| [opt_proxy, proxy_user, proxy_pass]}
  else
    raise ArgumentError.new("Invalid proxy option: #{opt_proxy}")
  end

  uri_set = {}
  buf = nil
  while true
    redirect = catch(:open_uri_redirect) {
      buf = Buffer.new
      uri.buffer_open(buf, find_proxy.call(uri), options)
      nil
    }
    if redirect
      if redirect.relative?
        # Although it violates RFC2616, Location: field may have relative
        # URI.  It is converted to absolute URI using uri as a base URI.
        redirect = uri + redirect
      end
      if !options.fetch(:redirect, true)
        raise HTTPRedirect.new(buf.io.status.join(' '), buf.io, redirect)
      end
      unless OpenURI.redirectable?(uri, redirect)
        raise "redirection forbidden: #{uri} -> #{redirect}"
      end
      if options.include? :http_basic_authentication
        # send authentication only for the URI directly specified.
        options = options.dup
        options.delete :http_basic_authentication
      end
      uri = redirect
      raise "HTTP redirection loop: #{uri}" if uri_set.include? uri.to_s
      uri_set[uri.to_s] = true
    else
      break
    end
  end
  io = buf.io
  io.base_uri = uri
  io
end

.open_uri(name, *rest) ⇒ Object

:nodoc:

Raises:

  • (ArgumentError)


127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
# File 'lib/open-uri.rb', line 127

def OpenURI.open_uri(name, *rest) # :nodoc:
  uri = URI::Generic === name ? name : URI.parse(name)
  mode, perm, rest = OpenURI.scan_open_optional_arguments(*rest)
  options = rest.shift if !rest.empty? && Hash === rest.first
  raise ArgumentError.new("extra arguments") if !rest.empty?
  options ||= {}
  OpenURI.check_options(options)

  if /\Arb?(?:\Z|:([^:]+))/ =~ mode
    encoding, = $1,Encoding.find($1) if $1
    mode = nil
  end

  unless mode == nil ||
         mode == 'r' || mode == 'rb' ||
         mode == File::RDONLY
    raise ArgumentError.new("invalid access mode #{mode} (#{uri.class} resource is read only.)")
  end

  io = open_loop(uri, options)
  io.set_encoding(encoding) if encoding
  if block_given?
    begin
      yield io
    ensure
      io.close
    end
  else
    io
  end
end

.redirectable?(uri1, uri2) ⇒ Boolean

:nodoc:

Returns:

  • (Boolean)


235
236
237
238
239
240
241
242
243
244
# File 'lib/open-uri.rb', line 235

def OpenURI.redirectable?(uri1, uri2) # :nodoc:
  # This test is intended to forbid a redirection from http://... to
  # file:///etc/passwd.
  # https to http redirect is also forbidden intentionally.
  # It avoids sending secure cookie or referer by non-secure HTTP protocol.
  # (RFC 2109 4.3.1, RFC 2965 3.3, RFC 2616 15.1.3)
  # However this is ad hoc.  It should be extensible/configurable.
  uri1.scheme.downcase == uri2.scheme.downcase ||
  (/\A(?:http|ftp)\z/i =~ uri1.scheme && /\A(?:http|ftp)\z/i =~ uri2.scheme)
end

.scan_open_optional_arguments(*rest) ⇒ Object

:nodoc:



117
118
119
120
121
122
123
124
125
# File 'lib/open-uri.rb', line 117

def OpenURI.scan_open_optional_arguments(*rest) # :nodoc:
  if !rest.empty? && (String === rest.first || Integer === rest.first)
    mode = rest.shift
    if !rest.empty? && Integer === rest.first
      perm = rest.shift
    end
  end
  return mode, perm, rest
end