Class: Polipus::HTTP

Inherits:
Object
  • Object
show all
Defined in:
lib/polipus/http.rb

Constant Summary collapse

REDIRECT_LIMIT =

Maximum number of redirects to follow on each get_response

5

Instance Method Summary collapse

Constructor Details

#initialize(opts = {}) ⇒ HTTP

Returns a new instance of HTTP.



11
12
13
14
15
# File 'lib/polipus/http.rb', line 11

def initialize(opts = {})
  @connections = {}
  @connections_hits = {}
  @opts = opts
end

Instance Method Details

#accept_cookies?Boolean

Does this HTTP client accept cookies from the server?

Returns:

  • (Boolean)


115
116
117
# File 'lib/polipus/http.rb', line 115

def accept_cookies?
  @opts[:accept_cookies]
end


119
120
121
122
# File 'lib/polipus/http.rb', line 119

def cookie_jar
  @opts[:cookie_jar] ||= ::HTTP::CookieJar.new
  @opts[:cookie_jar]
end

#fetch_page(url, referer = nil, depth = nil) ⇒ Object

Fetch a single Page from the response of an HTTP request to url. Just gets the final destination page.



21
22
23
# File 'lib/polipus/http.rb', line 21

def fetch_page(url, referer = nil, depth = nil)
  fetch_pages(url, referer, depth).last
end

#fetch_pages(url, referer = nil, depth = nil) ⇒ Object

Create new Pages from the response of an HTTP request to url, including redirects



29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# File 'lib/polipus/http.rb', line 29

def fetch_pages(url, referer = nil, depth = nil)
  url = URI(url)
  pages = []
  get(url, referer) do |response, code, location, redirect_to, response_time|
    body = response.body.dup
    if response.to_hash.fetch('content-encoding', [])[0] == 'gzip'
      gzip = Zlib::GzipReader.new(StringIO.new(body))
      body = gzip.read
    end
  
    pages << Page.new(location, :body          => body,
                                :code          => code,
                                :headers       => response.to_hash,
                                :referer       => referer,
                                :depth         => depth,
                                :redirect_to   => redirect_to,
                                :response_time => response_time,
                                :fetched_at    => Time.now.to_i)
  end

  pages
rescue StandardError => e
  if verbose?
    puts e.inspect
    puts e.backtrace
  end

  [Page.new(url, :error => e)]
end

#open_timeoutObject

HTTP open timeout in seconds



109
110
111
# File 'lib/polipus/http.rb', line 109

def open_timeout
  @opts[:open_timeout]
end

#proxy_hostObject

The proxy address string



78
79
80
81
# File 'lib/polipus/http.rb', line 78

def proxy_host
  return proxy_host_port.first unless @opts[:proxy_host_port].nil?
  @opts[:proxy_host].respond_to?(:call) ? @opts[:proxy_host].call(self) : @opts[:proxy_host]
end

#proxy_host_portObject

Shorthand to get proxy info with a single call It returns an array of [‘addr’, port]



95
96
97
# File 'lib/polipus/http.rb', line 95

def proxy_host_port
  @opts[:proxy_host_port].respond_to?(:call) ? @opts[:proxy_host_port].call(self) : @opts[:proxy_host_port]
end

#proxy_portObject

The proxy port



86
87
88
89
# File 'lib/polipus/http.rb', line 86

def proxy_port
  return proxy_host_port.last unless @opts[:proxy_host_port].nil?
  @opts[:proxy_port].respond_to?(:call) ? @opts[:proxy_port].call(self) : @opts[:proxy_port]
end

#read_timeoutObject

HTTP read timeout in seconds



102
103
104
# File 'lib/polipus/http.rb', line 102

def read_timeout
  @opts[:read_timeout]
end

#redirect_limitObject

The maximum number of redirects to follow



62
63
64
# File 'lib/polipus/http.rb', line 62

def redirect_limit
  @opts[:redirect_limit] || REDIRECT_LIMIT
end

#user_agentObject

The user-agent string which will be sent with each request, or nil if no such option is set



70
71
72
# File 'lib/polipus/http.rb', line 70

def user_agent
  @opts[:user_agent]
end