Class: Wayfarer::Dispatcher

Inherits:
Object
  • Object
show all
Extended by:
Forwardable
Includes:
Observable, CrawlObserver::Events, CrawlObserver::ObservableShortcuts
Defined in:
lib/wayfarer/dispatcher.rb

Overview

Creates job instances, retrieves pages and, if a URI matches a route, calls methods on the instances.

Defined Under Namespace

Classes: Error, Halt, Mismatch, Stage

Constant Summary

Constants included from CrawlObserver::Events

CrawlObserver::Events::CycleFinished

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from CrawlObserver::ObservableShortcuts

#notify_observers!

Constructor Details

#initialize(job) ⇒ Dispatcher

Returns a new instance of Dispatcher.



28
29
30
31
# File 'lib/wayfarer/dispatcher.rb', line 28

def initialize(job)
  @job = job
  @adapter_pool = HTTPAdapters::AdapterPool.new(job)
end

Instance Attribute Details

#adapter_poolAdapterPool (readonly)

Returns:

  • (AdapterPool)


23
24
25
# File 'lib/wayfarer/dispatcher.rb', line 23

def adapter_pool
  @adapter_pool
end

#jobObject (readonly)

Returns the value of attribute job.



26
27
28
# File 'lib/wayfarer/dispatcher.rb', line 26

def job
  @job
end

Instance Method Details

#dispatch(job, uri, is_peeking: false) ⇒ Object

Dispatches this URI. Matches an URI against the rules of the job's router. If a rule matches, the page is retrieved, and the action associated with the route is called.

Parameters:

  • job (Job)
  • uri (URI)


42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# File 'lib/wayfarer/dispatcher.rb', line 42

def dispatch(job, uri, is_peeking: false)
  action, params = job.router.route(uri)
  return Mismatch.new(uri) unless action

  params = ActiveSupport::HashWithIndifferentAccess.new(params)

  notify_observers!(DispatchedURI.new(action, uri))

  job_instance = job.new
  result = nil

  adapter_pool.with do |adapter|
    job_instance.page = adapter.fetch(uri)
    job_instance.adapter = adapter
    job_instance.params = params

    result = job_instance.public_send(action) { |peek_uri|
      begin
        unless is_peeking
          notify_observers!(Peeking.new(uri))
          result = dispatch(job, URI(peek_uri), is_peeking: true)
          result.ret_val
        end
      rescue
        nil
      end
    }
  end

  if job_instance.halts?
    Halt.new(uri, action)
  else
    Stage.new(job_instance.staged_uris, result)
  end
# What follows are exceptions whose origin I don't care about at the moment
# TODO: Better logging
rescue Net::HTTP::Persistent::Error
  logger.warn("Net::HTTP::Persistent::Error @ #{uri}")
rescue Errno::EHOSTUNREACH
  logger.warn("Host unreachable @ #{uri}")
rescue Errno::ENETUNREACH
  logger.warn("No route to network present @ #{uri}")
rescue Net::OpenTimeout, Net::ReadTimeout
  logger.warn("::Net timeout @ #{uri}")

# SSL verification failed due to a missing certificate
rescue OpenSSL::SSL::SSLError
  logger.warn("SSL verification failed @ #{uri}")

# Ruby/zlib encountered a Z_DATA_ERROR.
# Usually if a stream was prematurely freed.
# Probably has to do with net-http-persistent?
rescue Zlib::DataError
  logger.warn("Z_DATA_ERROR")
rescue HTTPAdapters::NetHTTPAdapter::MalformedURI, URI::InvalidURIError
  logger.info("[warn#{self}] Malformed URI @ #{uri}")
rescue HTTPAdapters::NetHTTPAdapter::MalformedRedirectURI
  logger.info("Malformed redirect URI @ #{uri}")
rescue HTTPAdapters::NetHTTPAdapter::MaximumRedirectCountReached
  logger.info("Maximum redirect count reached @ #{uri}")
end