Class: Polipus::HTTP

Inherits:
Object
  • Object
show all
Defined in:
lib/polipus/http.rb

Constant Summary collapse

REDIRECT_LIMIT =

Maximum number of redirects to follow on each get_response

5
RESCUABLE_ERRORS =
[
  EOFError,
  Errno::ECONNREFUSED,
  Errno::ECONNRESET,
  Errno::EHOSTUNREACH,
  Errno::EINVAL,
  Errno::EPIPE,
  Errno::ETIMEDOUT,
  Net::HTTPBadResponse,
  Net::HTTPHeaderSyntaxError,
  Net::ProtocolError,
  SocketError,
  Timeout::Error,
  Zlib::DataError,
  Zlib::GzipFile::Error
]

Instance Method Summary collapse

Constructor Details

#initialize(opts = {}) ⇒ HTTP

Returns a new instance of HTTP.



28
29
30
31
32
# File 'lib/polipus/http.rb', line 28

def initialize(opts = {})
  @connections = {}
  @connections_hits = {}
  @opts = opts
end

Instance Method Details

#accept_cookies?Boolean

Does this HTTP client accept cookies from the server?

Returns:

  • (Boolean)


142
143
144
# File 'lib/polipus/http.rb', line 142

def accept_cookies?
  @opts[:accept_cookies]
end


146
147
148
149
# File 'lib/polipus/http.rb', line 146

def cookie_jar
  @opts[:cookie_jar] ||= ::HTTP::CookieJar.new
  @opts[:cookie_jar]
end

#fetch_page(url, referer = nil, depth = nil) ⇒ Object

Fetch a single Page from the response of an HTTP request to url. Just gets the final destination page.



38
39
40
# File 'lib/polipus/http.rb', line 38

def fetch_page(url, referer = nil, depth = nil)
  fetch_pages(url, referer, depth).last
end

#fetch_pages(url, referer = nil, depth = nil) ⇒ Object

Create new Pages from the response of an HTTP request to url, including redirects



46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# File 'lib/polipus/http.rb', line 46

def fetch_pages(url, referer = nil, depth = nil)
  url = URI(url)
  pages = []
  get(url, referer) do |response, code, location, redirect_to, response_time|
    handle_compression response
    pages << Page.new(location, body: response.body,
                                code: code,
                                headers: response.to_hash,
                                referer: referer,
                                depth: depth,
                                redirect_to: redirect_to,
                                response_time: response_time,
                                fetched_at: Time.now.to_i)
  end

  pages
rescue *RESCUABLE_ERRORS => e
  if verbose?
    puts e.inspect
    puts e.backtrace
  end

  [Page.new(url, error: e, referer: referer, depth: depth)]
end

#open_timeoutObject

HTTP open timeout in seconds



136
137
138
# File 'lib/polipus/http.rb', line 136

def open_timeout
  @opts[:open_timeout]
end

#proxy_hostObject

The proxy address string



89
90
91
92
# File 'lib/polipus/http.rb', line 89

def proxy_host
  return proxy_host_port.first unless @opts[:proxy_host_port].nil?
  @opts[:proxy_host].respond_to?(:call) ? @opts[:proxy_host].call(self) : @opts[:proxy_host]
end

#proxy_host_portObject

Shorthand to get proxy info with a single call It returns an array of [‘addr’, port, ‘user’, ‘pass’]



122
123
124
# File 'lib/polipus/http.rb', line 122

def proxy_host_port
  @opts[:proxy_host_port].respond_to?(:call) ? @opts[:proxy_host_port].call(self) : @opts[:proxy_host_port]
end

#proxy_passObject

The proxy password



113
114
115
116
# File 'lib/polipus/http.rb', line 113

def proxy_pass
  return proxy_host_port[3] unless @opts[:proxy_host_port].nil?
  @opts[:proxy_pass].respond_to?(:call) ? @opts[:proxy_pass].call(self) : @opts[:proxy_pass]
end

#proxy_portObject

The proxy port



97
98
99
100
# File 'lib/polipus/http.rb', line 97

def proxy_port
  return proxy_host_port[1] unless @opts[:proxy_host_port].nil?
  @opts[:proxy_port].respond_to?(:call) ? @opts[:proxy_port].call(self) : @opts[:proxy_port]
end

#proxy_userObject

The proxy username



105
106
107
108
# File 'lib/polipus/http.rb', line 105

def proxy_user
  return proxy_host_port[2] unless @opts[:proxy_host_port].nil?
  @opts[:proxy_user].respond_to?(:call) ? @opts[:proxy_user].call(self) : @opts[:proxy_user]
end

#read_timeoutObject

HTTP read timeout in seconds



129
130
131
# File 'lib/polipus/http.rb', line 129

def read_timeout
  @opts[:read_timeout]
end

#redirect_limitObject

The maximum number of redirects to follow



74
75
76
# File 'lib/polipus/http.rb', line 74

def redirect_limit
  @opts[:redirect_limit] || REDIRECT_LIMIT
end

#user_agentObject

The user-agent string which will be sent with each request, or nil if no such option is set



82
83
84
# File 'lib/polipus/http.rb', line 82

def user_agent
  @opts[:user_agent]
end