Class: SiteDiff::UriWrapper

Inherits:
Object
  • Object
show all
Defined in:
lib/sitediff/uriwrapper.rb

Defined Under Namespace

Classes: ReadResult

Constant Summary collapse

DEFAULT_CURL_OPTS =
{
  connecttimeout: 3,     # Don't hang on servers that don't exist
  followlocation: true,  # Follow HTTP redirects (code 301 and 302)
  headers: {
    'User-Agent' => 'Sitediff - https://github.com/evolvingweb/sitediff'
  }
}.freeze

Instance Method Summary collapse

Constructor Details

#initialize(uri, curl_opts = DEFAULT_CURL_OPTS, debug = true) ⇒ UriWrapper

Returns a new instance of UriWrapper.



38
39
40
41
42
43
44
# File 'lib/sitediff/uriwrapper.rb', line 38

def initialize(uri, curl_opts = DEFAULT_CURL_OPTS, debug = true)
  @uri = uri.respond_to?(:scheme) ? uri : Addressable::URI.parse(uri)
  # remove trailing '/'s from local URIs
  @uri.path.gsub!(%r{/*$}, '') if local?
  @curl_opts = curl_opts
  @debug = debug
end

Instance Method Details

#+(path) ⇒ Object

FIXME: this is not used anymore



67
68
69
70
71
72
# File 'lib/sitediff/uriwrapper.rb', line 67

def +(path)
  # 'path' for SiteDiff includes (parts of) path, query, and fragment.
  sep = ''
  sep = '/' if local? || @uri.path.empty?
  self.class.new(@uri.to_s + sep + path)
end

#charset_encoding(http_headers) ⇒ Object

Returns the encoding of an HTTP response from headers , nil if not specified.



83
84
85
86
87
88
89
# File 'lib/sitediff/uriwrapper.rb', line 83

def charset_encoding(http_headers)
  if (content_type = http_headers['Content-Type'])
    if (md = /;\s*charset=([-\w]*)/.match(content_type))
      md[1]
    end
  end
end

#local?Boolean

Is this a local filesystem path?

Returns:

  • (Boolean)


62
63
64
# File 'lib/sitediff/uriwrapper.rb', line 62

def local?
  @uri.scheme.nil?
end

#passwordObject



50
51
52
# File 'lib/sitediff/uriwrapper.rb', line 50

def password
  @uri.password
end

#queue(hydra, &handler) ⇒ Object

Queue reading this URL, with a completion handler to run after.

The handler should be callable as handler.

This method may choose not to queue the request at all, but simply execute right away.



148
149
150
151
152
153
154
# File 'lib/sitediff/uriwrapper.rb', line 148

def queue(hydra, &handler)
  if local?
    read_file(&handler)
  else
    hydra.queue(typhoeus_request(&handler))
  end
end

#read_fileObject

Reads a file and yields to the completion handler, see .queue()



75
76
77
78
79
# File 'lib/sitediff/uriwrapper.rb', line 75

def read_file
  File.open(@uri.to_s, 'r:UTF-8') { |f| yield ReadResult.new(f.read) }
rescue Errno::ENOENT, Errno::ENOTDIR, Errno::EACCES, Errno::EISDIR => e
  yield ReadResult.error(e.message)
end

#to_sObject



54
55
56
57
58
59
# File 'lib/sitediff/uriwrapper.rb', line 54

def to_s
  uri = @uri.dup
  uri.user = nil
  uri.password = nil
  uri.to_s
end

#typhoeus_requestObject

Returns a Typhoeus::Request to fetch @uri

Completion callbacks of the request wrap the given handler which is assumed to accept a single ReadResult argument.



95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# File 'lib/sitediff/uriwrapper.rb', line 95

def typhoeus_request
  params = @curl_opts.dup
  # Allow basic auth
  params[:userpwd] = @uri.user + ':' + @uri.password if @uri.user

  req = Typhoeus::Request.new(to_s, params)

  req.on_success do |resp|
    body = resp.body
    # Typhoeus does not respect HTTP headers when setting the encoding
    # resp.body; coerce if possible.
    if (encoding = charset_encoding(resp.headers))
      body.force_encoding(encoding)
    end
    # Should be wrapped with rescue I guess? Maybe this entire function?
    # Should at least be an option in the Cli to disable this.
    # "stop on first error"
    begin
      yield ReadResult.new(body, encoding)
    rescue ArgumentError => e
      raise if @debug

      yield ReadResult.error("Parsing error for #{@uri}: #{e.message}")
    rescue => e
      raise if @debug

      yield ReadResult.error("Unknown parsing error for #{@uri}: #{e.message}")
    end
  end

  req.on_failure do |resp|
    if resp&.status_message
      msg = resp.status_message
      yield ReadResult.error("HTTP error when loading #{@uri}: #{msg}",
                             resp.response_code)
    elsif (msg = resp.options[:return_code])
      yield ReadResult.error("Connection error when loading #{@uri}: #{msg}",
                             resp.response_code)
    else
      yield ReadResult.error("Unknown error when loading #{@uri}: #{msg}",
                             resp.response_code)
    end
  end

  req
end

#userObject



46
47
48
# File 'lib/sitediff/uriwrapper.rb', line 46

def user
  @uri.user
end