Class: SiteDiff::UriWrapper
- Inherits:
-
Object
- Object
- SiteDiff::UriWrapper
- Defined in:
- lib/sitediff/uriwrapper.rb
Defined Under Namespace
Classes: ReadResult
Constant Summary collapse
- DEFAULT_CURL_OPTS =
{ connecttimeout: 3, # Don't hang on servers that don't exist followlocation: true, # Follow HTTP redirects (code 301 and 302) headers: { 'User-Agent' => 'Sitediff - https://github.com/evolvingweb/sitediff' } }.freeze
Instance Method Summary collapse
-
#+(path) ⇒ Object
FIXME: this is not used anymore.
-
#charset_encoding(http_headers) ⇒ Object
Returns the encoding of an HTTP response from headers , nil if not specified.
-
#initialize(uri, curl_opts = DEFAULT_CURL_OPTS, debug = true) ⇒ UriWrapper
constructor
A new instance of UriWrapper.
-
#local? ⇒ Boolean
Is this a local filesystem path?.
- #password ⇒ Object
-
#queue(hydra, &handler) ⇒ Object
Queue reading this URL, with a completion handler to run after.
-
#read_file ⇒ Object
Reads a file and yields to the completion handler, see .queue().
- #to_s ⇒ Object
-
#typhoeus_request ⇒ Object
Returns a Typhoeus::Request to fetch @uri.
- #user ⇒ Object
Constructor Details
#initialize(uri, curl_opts = DEFAULT_CURL_OPTS, debug = true) ⇒ UriWrapper
Returns a new instance of UriWrapper.
38 39 40 41 42 43 44 |
# File 'lib/sitediff/uriwrapper.rb', line 38 def initialize(uri, curl_opts = DEFAULT_CURL_OPTS, debug = true) @uri = uri.respond_to?(:scheme) ? uri : Addressable::URI.parse(uri) # remove trailing '/'s from local URIs @uri.path.gsub!(%r{/*$}, '') if local? @curl_opts = curl_opts @debug = debug end |
Instance Method Details
#+(path) ⇒ Object
FIXME: this is not used anymore
67 68 69 70 71 72 |
# File 'lib/sitediff/uriwrapper.rb', line 67 def +(path) # 'path' for SiteDiff includes (parts of) path, query, and fragment. sep = '' sep = '/' if local? || @uri.path.empty? self.class.new(@uri.to_s + sep + path) end |
#charset_encoding(http_headers) ⇒ Object
Returns the encoding of an HTTP response from headers , nil if not specified.
83 84 85 86 87 88 89 |
# File 'lib/sitediff/uriwrapper.rb', line 83 def charset_encoding(http_headers) if (content_type = http_headers['Content-Type']) if (md = /;\s*charset=([-\w]*)/.match(content_type)) md[1] end end end |
#local? ⇒ Boolean
Is this a local filesystem path?
62 63 64 |
# File 'lib/sitediff/uriwrapper.rb', line 62 def local? @uri.scheme.nil? end |
#password ⇒ Object
50 51 52 |
# File 'lib/sitediff/uriwrapper.rb', line 50 def password @uri.password end |
#queue(hydra, &handler) ⇒ Object
Queue reading this URL, with a completion handler to run after.
The handler should be callable as handler.
This method may choose not to queue the request at all, but simply execute right away.
148 149 150 151 152 153 154 |
# File 'lib/sitediff/uriwrapper.rb', line 148 def queue(hydra, &handler) if local? read_file(&handler) else hydra.queue(typhoeus_request(&handler)) end end |
#read_file ⇒ Object
Reads a file and yields to the completion handler, see .queue()
75 76 77 78 79 |
# File 'lib/sitediff/uriwrapper.rb', line 75 def read_file File.open(@uri.to_s, 'r:UTF-8') { |f| yield ReadResult.new(f.read) } rescue Errno::ENOENT, Errno::ENOTDIR, Errno::EACCES, Errno::EISDIR => e yield ReadResult.error(e.) end |
#to_s ⇒ Object
54 55 56 57 58 59 |
# File 'lib/sitediff/uriwrapper.rb', line 54 def to_s uri = @uri.dup uri.user = nil uri.password = nil uri.to_s end |
#typhoeus_request ⇒ Object
Returns a Typhoeus::Request to fetch @uri
Completion callbacks of the request wrap the given handler which is assumed to accept a single ReadResult argument.
95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
# File 'lib/sitediff/uriwrapper.rb', line 95 def typhoeus_request params = @curl_opts.dup # Allow basic auth params[:userpwd] = @uri.user + ':' + @uri.password if @uri.user req = Typhoeus::Request.new(to_s, params) req.on_success do |resp| body = resp.body # Typhoeus does not respect HTTP headers when setting the encoding # resp.body; coerce if possible. if (encoding = charset_encoding(resp.headers)) body.force_encoding(encoding) end # Should be wrapped with rescue I guess? Maybe this entire function? # Should at least be an option in the Cli to disable this. # "stop on first error" begin yield ReadResult.new(body, encoding) rescue ArgumentError => e raise if @debug yield ReadResult.error("Parsing error for #{@uri}: #{e.message}") rescue => e raise if @debug yield ReadResult.error("Unknown parsing error for #{@uri}: #{e.message}") end end req.on_failure do |resp| if resp&. msg = resp. yield ReadResult.error("HTTP error when loading #{@uri}: #{msg}", resp.response_code) elsif (msg = resp.[:return_code]) yield ReadResult.error("Connection error when loading #{@uri}: #{msg}", resp.response_code) else yield ReadResult.error("Unknown error when loading #{@uri}: #{msg}", resp.response_code) end end req end |
#user ⇒ Object
46 47 48 |
# File 'lib/sitediff/uriwrapper.rb', line 46 def user @uri.user end |