Class: SiteInspector::Endpoint::Content
- Inherits:
-
Check
- Object
- Check
- SiteInspector::Endpoint::Content
show all
- Defined in:
- lib/site-inspector/checks/content.rb
Instance Attribute Summary
Attributes inherited from Check
#endpoint
Instance Method Summary
collapse
Methods inherited from Check
enabled=, enabled?, #host, #initialize, #inspect, #name, name, #request
Instance Method Details
#body ⇒ Object
23
24
25
|
# File 'lib/site-inspector/checks/content.rb', line 23
def body
@body ||= document.to_s.force_encoding('UTF-8').encode('UTF-8', invalid: :replace, replace: '')
end
|
#doctype ⇒ Object
39
40
41
|
# File 'lib/site-inspector/checks/content.rb', line 39
def doctype
document.internal_subset.external_id
end
|
#document ⇒ Object
Also known as:
doc
17
18
19
20
|
# File 'lib/site-inspector/checks/content.rb', line 17
def document
require 'nokogiri'
@doc ||= Nokogiri::HTML response.body if response
end
|
#generator ⇒ Object
43
44
45
46
47
48
|
# File 'lib/site-inspector/checks/content.rb', line 43
def generator
@generator ||= begin
tag = document.at('meta[name="generator"]')
tag['content'] if tag
end
end
|
#humans_txt? ⇒ Boolean
35
36
37
|
# File 'lib/site-inspector/checks/content.rb', line 35
def humans_txt?
@humans_txt ||= path_exists?('humans.txt') if proper_404s?
end
|
#path_exists?(path) ⇒ Boolean
Given a path (e.g, “/data”), check if the given path exists on the canonical endpoint
7
8
9
|
# File 'lib/site-inspector/checks/content.rb', line 7
def path_exists?(path)
endpoint.up? && endpoint.request(path: path, followlocation: true).success?
end
|
#prefetch ⇒ Object
50
51
52
53
54
55
56
57
58
59
|
# File 'lib/site-inspector/checks/content.rb', line 50
def prefetch
return unless endpoint.up?
options = SiteInspector.typhoeus_defaults.merge(followlocation: true)
['robots.txt', 'sitemap.xml', 'humans.txt', random_path].each do |path|
request = Typhoeus::Request.new(URI.join(endpoint.uri, path), options)
SiteInspector.hydra.queue(request)
end
SiteInspector.hydra.run
end
|
#proper_404s? ⇒ Boolean
61
62
63
|
# File 'lib/site-inspector/checks/content.rb', line 61
def proper_404s?
@proper_404s ||= !path_exists?(random_path)
end
|
#response ⇒ Object
The default Check#response method is from a HEAD request The content check has a special response which includes the body from a GET request
13
14
15
|
# File 'lib/site-inspector/checks/content.rb', line 13
def response
@response ||= endpoint.request(method: :get)
end
|
#robots_txt? ⇒ Boolean
27
28
29
|
# File 'lib/site-inspector/checks/content.rb', line 27
def robots_txt?
@bodts_txt ||= path_exists?('robots.txt') if proper_404s?
end
|
#sitemap_xml? ⇒ Boolean
31
32
33
|
# File 'lib/site-inspector/checks/content.rb', line 31
def sitemap_xml?
@sitemap_xml ||= path_exists?('sitemap.xml') if proper_404s?
end
|
#to_h ⇒ Object
65
66
67
68
69
70
71
72
73
74
75
|
# File 'lib/site-inspector/checks/content.rb', line 65
def to_h
prefetch
{
doctype: doctype,
generator: generator,
sitemap_xml: sitemap_xml?,
robots_txt: robots_txt?,
humans_txt: humans_txt?,
proper_404s: proper_404s?
}
end
|