Class: Scruber::FetcherAdapters::TyphoeusFetcher

Inherits:
AbstractAdapter show all
Defined in:
lib/scruber/fetcher_adapters/typhoeus_fetcher.rb

Instance Attribute Summary collapse

Attributes inherited from AbstractAdapter

#followlocation, #max_concurrency, #max_retry_times, #options, #request_timeout, #retry_delays

Instance Method Summary collapse

Methods inherited from AbstractAdapter

#after_request_callback, #bad_response?, #before_request_callback, #convert_to_utf8, #cookie_for, #determine_retry_at, #headers_for, #proxy_for, #user_agent_for

Constructor Details

#initialize(options = {}) ⇒ TyphoeusFetcher

Returns a new instance of TyphoeusFetcher.



8
9
10
11
12
13
# File 'lib/scruber/fetcher_adapters/typhoeus_fetcher.rb', line 8

def initialize(options={})
  super(options)
  @ssl_verifypeer = options.fetch(:ssl_verifypeer) { false }
  @ssl_verifyhost = options.fetch(:ssl_verifyhost) { 0 }
  @max_requests = options.fetch(:max_requests) { @max_concurrency * 10 }
end

Instance Attribute Details

#ssl_verifyhostObject

Returns the value of attribute ssl_verifyhost.



5
6
7
# File 'lib/scruber/fetcher_adapters/typhoeus_fetcher.rb', line 5

def ssl_verifyhost
  @ssl_verifyhost
end

#ssl_verifypeerObject

Returns the value of attribute ssl_verifypeer.



5
6
7
# File 'lib/scruber/fetcher_adapters/typhoeus_fetcher.rb', line 5

def ssl_verifypeer
  @ssl_verifypeer
end

Instance Method Details

#build_request(page) ⇒ Object



28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# File 'lib/scruber/fetcher_adapters/typhoeus_fetcher.rb', line 28

def build_request(page)
  page = before_request_callback(page)
  request_options = {
    method: page[:method],
    body: page[:body],
    # params: page[:params],
    headers: headers_for(page),
    accept_encoding: 'gzip',
    forbid_reuse: true,
    followlocation: page.options.fetch(:followlocation){ @followlocation },
    ssl_verifypeer: page.options.fetch(:ssl_verifypeer){ @ssl_verifypeer },
    ssl_verifyhost: page.options.fetch(:ssl_verifyhost){ @ssl_verifyhost },
    timeout: @request_timeout
  }
  
  proxy = proxy_for(page)
  request_options.merge!({proxy: proxy.http? ? proxy.address :  "socks://#{proxy.address}"}) if proxy
  request_options.merge!({proxyuserpwd: proxy.proxyuserpwd}) if proxy && proxy.proxyuserpwd.present?

  request = Typhoeus::Request.new(page[:url], request_options)

  request.on_complete do |response|
    on_complete_callback(page, response)
  end

  request
end

#hydraObject



56
57
58
# File 'lib/scruber/fetcher_adapters/typhoeus_fetcher.rb', line 56

def hydra
  @hydra ||= Typhoeus::Hydra.new(max_concurrency: @max_concurrency)
end

#on_complete_callback(page, response) ⇒ Object



60
61
62
63
64
65
66
67
68
69
70
71
72
# File 'lib/scruber/fetcher_adapters/typhoeus_fetcher.rb', line 60

def on_complete_callback(page, response)
  page.response_code = response.code
  page.response_body = response.body
  page.response_headers = response.headers
  page.response_total_time = response.total_time
  
  if response.timed_out?
    page.response_code = 1
  end

  page = after_request_callback(page)
  page.save
end

#run(queue) ⇒ Object



15
16
17
18
19
20
21
22
23
24
25
26
# File 'lib/scruber/fetcher_adapters/typhoeus_fetcher.rb', line 15

def run(queue)
  queue.fetch_pending(@max_requests).each do |page|
    request = build_request(page)

    hydra.queue(request)
  end
  if hydra.queued_requests.count > 0
    hydra.run
  else
    sleep 1
  end
end