Class: SiteInspector::Endpoint

Inherits:
Object
  • Object
show all
Defined in:
lib/site-inspector/endpoint.rb,
lib/site-inspector/checks/dns.rb,
lib/site-inspector/checks/hsts.rb,
lib/site-inspector/checks/check.rb,
lib/site-inspector/checks/https.rb,
lib/site-inspector/checks/whois.rb,
lib/site-inspector/checks/content.rb,
lib/site-inspector/checks/cookies.rb,
lib/site-inspector/checks/headers.rb,
lib/site-inspector/checks/sniffer.rb,
lib/site-inspector/checks/wappalyzer.rb,
lib/site-inspector/checks/accessibility.rb

Overview

Every domain has four possible “endpoints” to evaluate

For example, if you had ‘example.com` you’d have:

1. `http://example.com`
2. `http://www.example.com`
3. `https://example.com`
4. `https://www.example.com`

Because each of the four endpoints could potentially respond differently We must evaluate all four to make certain determination

Defined Under Namespace

Classes: Accessibility, Check, Content, Cookies, Dns, Headers, Hsts, Https, Sniffer, Wappalyzer, Whois

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(host, options = {}) ⇒ Endpoint

Initatiate a new Endpoint object

endpoint - (string) the endpoint to query (e.g., ‘example.com`) options - A hash of options

domain - the parent domain object, if passed, facilitates caching of redirects


22
23
24
25
26
27
28
29
30
# File 'lib/site-inspector/endpoint.rb', line 22

def initialize(host, options = {})
  @uri = Addressable::URI.parse(host.downcase)
  # The root URL always has an implict path of "/", even if not requested
  # Make it explicit to facilitate caching and prevent a potential redirect
  @uri.path = '/'
  @host = uri.host.sub(/^www\./, '')
  @checks = {}
  @domain = options[:domain]
end

Dynamic Method Handling

This class handles dynamic methods through the method_missing method

#method_missing(method_sym, *arguments, &block) ⇒ Object



186
187
188
189
190
191
192
193
# File 'lib/site-inspector/endpoint.rb', line 186

def method_missing(method_sym, *arguments, &block)
  check = SiteInspector::Endpoint.checks.find { |c| c.name == method_sym }
  if check
    @checks[method_sym] ||= check.new(self)
  else
    super
  end
end

Instance Attribute Details

#domainObject

Returns the value of attribute domain.



15
16
17
# File 'lib/site-inspector/endpoint.rb', line 15

def domain
  @domain
end

#hostObject

Returns the value of attribute host.



15
16
17
# File 'lib/site-inspector/endpoint.rb', line 15

def host
  @host
end

#uriObject

Returns the value of attribute uri.



15
16
17
# File 'lib/site-inspector/endpoint.rb', line 15

def uri
  @uri
end

Class Method Details

.checksObject



180
181
182
183
184
# File 'lib/site-inspector/endpoint.rb', line 180

def self.checks
  return @checks if defined? @checks

  @checks = ObjectSpace.each_object(Class).select { |klass| klass < Check }.select(&:enabled?).sort_by(&:name)
end

Instance Method Details

#external_redirect?Boolean

Returns:

  • (Boolean)


134
135
136
# File 'lib/site-inspector/endpoint.rb', line 134

def external_redirect?
  host != resolves_to.host
end

#http?Boolean

Returns:

  • (Boolean)


44
45
46
# File 'lib/site-inspector/endpoint.rb', line 44

def http?
  !https?
end

#https?Boolean

Returns:

  • (Boolean)


40
41
42
# File 'lib/site-inspector/endpoint.rb', line 40

def https?
  https.scheme?
end

#inspectObject



142
143
144
# File 'lib/site-inspector/endpoint.rb', line 142

def inspect
  "#<SiteInspector::Endpoint uri=\"#{uri}\">"
end

#redirectObject

If the domain is a redirect, what’s the first endpoint we’re redirected to?



86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# File 'lib/site-inspector/endpoint.rb', line 86

def redirect
  return unless response && response_code.start_with?('3')

  @redirect ||= begin
    redirect = Addressable::URI.parse(headers['location'])

    # This is a relative redirect, but we still need the absolute URI
    if redirect.relative?
      redirect.path = "/#{redirect.path}" unless redirect.path[0] == '/'
      redirect.host = host
      redirect.scheme = scheme
    end

    # This was a redirect to a subpath or back to itself, which we don't care about
    return if redirect.host == host && redirect.scheme == scheme

    # Init a new endpoint representing the redirect
    find_or_create_by_uri(redirect.to_s)
  end
end

#redirect?Boolean

Does this endpoint return a redirect?

Returns:

  • (Boolean)


108
109
110
# File 'lib/site-inspector/endpoint.rb', line 108

def redirect?
  !!redirect
end

#request(options = {}) ⇒ Object



52
53
54
55
56
57
58
# File 'lib/site-inspector/endpoint.rb', line 52

def request(options = {})
  target = options[:path] ? URI.join(uri, options.delete(:path)) : uri
  request = Typhoeus::Request.new(target, SiteInspector.typhoeus_defaults.merge(options))
  hydra.queue(request)
  hydra.run
  request.response
end

#resolves_toObject

What’s the effective URL of a request to this domain?



113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# File 'lib/site-inspector/endpoint.rb', line 113

def resolves_to
  return self unless redirect?

  # If the redirect doesn't return a 30x response code, return the redirected endpoint
  # Otherwise, we'll need to go down the rabbit hole and see how deep it goes
  return redirect unless redirect.redirect?

  @resolves_to ||= begin
    response = request(followlocation: true)

    # Workaround for Webmock not playing nicely with Typhoeus redirects
    url = if response.mock?
            response.headers['Location'] || response.request.url
          else
            response.effective_url
          end

    find_or_create_by_uri(url)
  end
end

#respond_to_missing?(method_sym, include_private = false) ⇒ Boolean

Returns:

  • (Boolean)


195
196
197
198
199
200
201
# File 'lib/site-inspector/endpoint.rb', line 195

def respond_to_missing?(method_sym, include_private = false)
  if checks.key?(method_sym)
    true
  else
    super
  end
end

#responds?Boolean

Does the server respond at all?

Returns:

  • (Boolean)


81
82
83
# File 'lib/site-inspector/endpoint.rb', line 81

def responds?
  response.code != 0 && !timed_out?
end

#responseObject

Makes a GET request of the given host

Retutns the Typhoeus::Response object



63
64
65
# File 'lib/site-inspector/endpoint.rb', line 63

def response
  @response ||= request
end

#response_codeObject



67
68
69
# File 'lib/site-inspector/endpoint.rb', line 67

def response_code
  response.response_code.to_s if response
end

#root?Boolean

Returns:

  • (Boolean)


36
37
38
# File 'lib/site-inspector/endpoint.rb', line 36

def root?
  !www?
end

#schemeObject



48
49
50
# File 'lib/site-inspector/endpoint.rb', line 48

def scheme
  @uri.scheme
end

#timed_out?Boolean

Returns:

  • (Boolean)


71
72
73
# File 'lib/site-inspector/endpoint.rb', line 71

def timed_out?
  response&.timed_out?
end

#to_h(options = {}) ⇒ Object

Returns information about the endpoint

By default, all checks are run. If one or more check names are passed in the options hash, only those checks will be run.

options:

a hash of check symbols and bools representing which checks should be run

Returns the hash representing the endpoint and its checks



155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
# File 'lib/site-inspector/endpoint.rb', line 155

def to_h(options = {})
  hash = {
    uri: uri.to_s,
    host: host,
    www: www?,
    https: https?,
    scheme: scheme,
    up: up?,
    responds: responds?,
    timed_out: timed_out?,
    redirect: redirect?,
    external_redirect: external_redirect?
  }

  # Either they've specifically asked for a check, or we throw everything at them
  checks = SiteInspector::Endpoint.checks.select { |c| options.key?(c.name) }
  checks = SiteInspector::Endpoint.checks if checks.empty?

  Parallel.each(checks, in_threads: 4) do |check|
    hash[check.name] = send(check.name).to_h
  end

  hash
end

#to_sObject



138
139
140
# File 'lib/site-inspector/endpoint.rb', line 138

def to_s
  uri.to_s
end

#up?Boolean

Does the endpoint return a 2xx or 3xx response code?

Returns:

  • (Boolean)


76
77
78
# File 'lib/site-inspector/endpoint.rb', line 76

def up?
  response && response_code.start_with?('2') || response_code.start_with?('3')
end

#www?Boolean

Returns:

  • (Boolean)


32
33
34
# File 'lib/site-inspector/endpoint.rb', line 32

def www?
  !!(uri.host =~ /^www\./)
end