Class: Kudzu::Agent::Fetcher

Inherits:
Object
  • Object
show all
Defined in:
lib/kudzu/agent/fetcher.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(config, robots = nil) ⇒ Fetcher

Returns a new instance of Fetcher.



6
7
8
9
10
11
12
# File 'lib/kudzu/agent/fetcher.rb', line 6

def initialize(config, robots = nil)
  @config = config
  @pool = Http::ConnectionPool.new(@config.max_connection || 100)
  @sleeper = Sleeper.new(@config, robots)
  @filterer = PageFilterer.new(@config)
  @jar = HTTP::CookieJar.new
end

Instance Attribute Details

#poolObject (readonly)

Returns the value of attribute pool.



4
5
6
# File 'lib/kudzu/agent/fetcher.rb', line 4

def pool
  @pool
end

Instance Method Details

#fetch(url, request_header: {}, method: :get, redirect: @config.max_redirect, redirect_from: nil) ⇒ Object



14
15
16
17
18
19
20
21
22
23
24
25
26
# File 'lib/kudzu/agent/fetcher.rb', line 14

def fetch(url, request_header: {}, method: :get, redirect: @config.max_redirect, redirect_from: nil)
  uri = Addressable::URI.parse(url)
  request = build_request(uri, request_header: request_header, method: method)
  response, response_time = send_request(uri, request)

  if redirection?(response.code) && response['location'] && redirect > 0
    fetch(uri.join(response['location']).to_s, request_header: request_header,
                                               redirect: redirect - 1,
                                               redirect_from: redirect_from || url)
  else
    build_response(url, response, response_time, redirect_from)
  end
end