Class: SiteInspector::Endpoint::Content

Inherits:
Check
  • Object
show all
Defined in:
lib/site-inspector/checks/content.rb

Instance Attribute Summary

Attributes inherited from Check

#endpoint

Instance Method Summary collapse

Methods inherited from Check

enabled=, enabled?, #host, #initialize, #inspect, #name, name, #request

Constructor Details

This class inherits a constructor from SiteInspector::Endpoint::Check

Instance Method Details

#bodyObject



23
24
25
# File 'lib/site-inspector/checks/content.rb', line 23

def body
  @body ||= document.to_s.force_encoding('UTF-8').encode('UTF-8', invalid: :replace, replace: '')
end

#doctypeObject



39
40
41
# File 'lib/site-inspector/checks/content.rb', line 39

def doctype
  document.internal_subset.external_id
end

#documentObject Also known as: doc



17
18
19
20
# File 'lib/site-inspector/checks/content.rb', line 17

def document
  require 'nokogiri'
  @doc ||= Nokogiri::HTML response.body if response
end

#generatorObject



43
44
45
46
47
48
# File 'lib/site-inspector/checks/content.rb', line 43

def generator
  @generator ||= begin
    tag = document.at('meta[name="generator"]')
    tag['content'] if tag
  end
end

#humans_txt?Boolean

Returns:

  • (Boolean)


35
36
37
# File 'lib/site-inspector/checks/content.rb', line 35

def humans_txt?
  @humans_txt ||= path_exists?('humans.txt') if proper_404s?
end

#path_exists?(path) ⇒ Boolean

Given a path (e.g, “/data”), check if the given path exists on the canonical endpoint

Returns:

  • (Boolean)


7
8
9
# File 'lib/site-inspector/checks/content.rb', line 7

def path_exists?(path)
  endpoint.up? && endpoint.request(path: path, followlocation: true).success?
end

#prefetchObject



50
51
52
53
54
55
56
57
58
59
# File 'lib/site-inspector/checks/content.rb', line 50

def prefetch
  return unless endpoint.up?

  options = SiteInspector.typhoeus_defaults.merge(followlocation: true)
  ['robots.txt', 'sitemap.xml', 'humans.txt', random_path].each do |path|
    request = Typhoeus::Request.new(URI.join(endpoint.uri, path), options)
    SiteInspector.hydra.queue(request)
  end
  SiteInspector.hydra.run
end

#proper_404s?Boolean

Returns:

  • (Boolean)


61
62
63
# File 'lib/site-inspector/checks/content.rb', line 61

def proper_404s?
  @proper_404s ||= !path_exists?(random_path)
end

#responseObject

The default Check#response method is from a HEAD request The content check has a special response which includes the body from a GET request



13
14
15
# File 'lib/site-inspector/checks/content.rb', line 13

def response
  @response ||= endpoint.request(method: :get)
end

#robots_txt?Boolean

Returns:

  • (Boolean)


27
28
29
# File 'lib/site-inspector/checks/content.rb', line 27

def robots_txt?
  @bodts_txt ||= path_exists?('robots.txt') if proper_404s?
end

#sitemap_xml?Boolean

Returns:

  • (Boolean)


31
32
33
# File 'lib/site-inspector/checks/content.rb', line 31

def sitemap_xml?
  @sitemap_xml ||= path_exists?('sitemap.xml') if proper_404s?
end

#to_hObject



65
66
67
68
69
70
71
72
73
74
75
# File 'lib/site-inspector/checks/content.rb', line 65

def to_h
  prefetch
  {
    doctype: doctype,
    generator: generator,
    sitemap_xml: sitemap_xml?,
    robots_txt: robots_txt?,
    humans_txt: humans_txt?,
    proper_404s: proper_404s?
  }
end