Class: Scruber::FetcherAdapters::TyphoeusFetcher
- Inherits:
-
AbstractAdapter
- Object
- AbstractAdapter
- Scruber::FetcherAdapters::TyphoeusFetcher
- Defined in:
- lib/scruber/fetcher_adapters/typhoeus_fetcher.rb
Instance Attribute Summary collapse
-
#ssl_verifyhost ⇒ Object
Returns the value of attribute ssl_verifyhost.
-
#ssl_verifypeer ⇒ Object
Returns the value of attribute ssl_verifypeer.
Attributes inherited from AbstractAdapter
#followlocation, #max_concurrency, #max_retry_times, #options, #request_timeout, #retry_delays
Instance Method Summary collapse
- #build_request(page) ⇒ Object
- #hydra ⇒ Object
-
#initialize(options = {}) ⇒ TyphoeusFetcher
constructor
A new instance of TyphoeusFetcher.
- #on_complete_callback(page, response) ⇒ Object
- #run(queue) ⇒ Object
Methods inherited from AbstractAdapter
#after_request_callback, #bad_response?, #before_request_callback, #convert_to_utf8, #cookie_for, #determine_retry_at, #headers_for, #proxy_for, #user_agent_for
Constructor Details
#initialize(options = {}) ⇒ TyphoeusFetcher
Returns a new instance of TyphoeusFetcher.
8 9 10 11 12 13 |
# File 'lib/scruber/fetcher_adapters/typhoeus_fetcher.rb', line 8 def initialize(={}) super() @ssl_verifypeer = .fetch(:ssl_verifypeer) { false } @ssl_verifyhost = .fetch(:ssl_verifyhost) { 0 } @max_requests = .fetch(:max_requests) { @max_concurrency * 10 } end |
Instance Attribute Details
#ssl_verifyhost ⇒ Object
Returns the value of attribute ssl_verifyhost.
5 6 7 |
# File 'lib/scruber/fetcher_adapters/typhoeus_fetcher.rb', line 5 def ssl_verifyhost @ssl_verifyhost end |
#ssl_verifypeer ⇒ Object
Returns the value of attribute ssl_verifypeer.
5 6 7 |
# File 'lib/scruber/fetcher_adapters/typhoeus_fetcher.rb', line 5 def ssl_verifypeer @ssl_verifypeer end |
Instance Method Details
#build_request(page) ⇒ Object
28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
# File 'lib/scruber/fetcher_adapters/typhoeus_fetcher.rb', line 28 def build_request(page) page = before_request_callback(page) = { method: page[:method], body: page[:body], # params: page[:params], headers: headers_for(page), accept_encoding: 'gzip', forbid_reuse: true, followlocation: page..fetch(:followlocation){ @followlocation }, ssl_verifypeer: page..fetch(:ssl_verifypeer){ @ssl_verifypeer }, ssl_verifyhost: page..fetch(:ssl_verifyhost){ @ssl_verifyhost }, timeout: @request_timeout } proxy = proxy_for(page) .merge!({proxy: proxy.http? ? proxy.address : "socks://#{proxy.address}"}) if proxy .merge!({proxyuserpwd: proxy.proxyuserpwd}) if proxy && proxy.proxyuserpwd.present? request = Typhoeus::Request.new(page[:url], ) request.on_complete do |response| on_complete_callback(page, response) end request end |
#hydra ⇒ Object
56 57 58 |
# File 'lib/scruber/fetcher_adapters/typhoeus_fetcher.rb', line 56 def hydra @hydra ||= Typhoeus::Hydra.new(max_concurrency: @max_concurrency) end |
#on_complete_callback(page, response) ⇒ Object
60 61 62 63 64 65 66 67 68 69 70 71 72 |
# File 'lib/scruber/fetcher_adapters/typhoeus_fetcher.rb', line 60 def on_complete_callback(page, response) page.response_code = response.code page.response_body = response.body page.response_headers = response.headers page.response_total_time = response.total_time if response.timed_out? page.response_code = 1 end page = after_request_callback(page) page.save end |
#run(queue) ⇒ Object
15 16 17 18 19 20 21 22 23 24 25 26 |
# File 'lib/scruber/fetcher_adapters/typhoeus_fetcher.rb', line 15 def run(queue) queue.fetch_pending(@max_requests).each do |page| request = build_request(page) hydra.queue(request) end if hydra.queued_requests.count > 0 hydra.run else sleep 1 end end |