Class: SiteHealth::Check

Inherits:
Object
  • Object
show all
Defined in:
lib/site_health.rb

Defined Under Namespace

Classes: HTTPCodeJournal

Constant Summary collapse

BrokenLinkJournal =
KeyStruct.new(:url, :exists_on)
ChecksJournal =
KeyStruct.new(
  :missing_html_title,
  :broken_urls,
  :http_error_urls,
  :html_error_urls,
  :html_warning_urls,
  :xml_error_urls,
  :css_error_urls,
  :css_warning_urls
)

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(site:) ⇒ Check

Returns a new instance of Check.



48
49
50
# File 'lib/site_health.rb', line 48

def initialize(site:)
  @site = site
end

Instance Attribute Details

#siteObject (readonly)

Returns the value of attribute site.



46
47
48
# File 'lib/site_health.rb', line 46

def site
  @site
end

Class Method Details

.call(**args) ⇒ Object



22
23
24
# File 'lib/site_health.rb', line 22

def self.call(**args)
  new(**args).call
end

Instance Method Details

Finds all pages which have broken links:



120
121
122
123
124
125
126
# File 'lib/site_health.rb', line 120

def broken_links(spider, url_map)
  # FIXME: spider#failures only returns timeout errors etc and not HTTP error status codes..
  #        so we need to have 2 types of "failed" URLs
  spider.failures.map do |failed_url|
    BrokenLinkJournal.new(url: failed_url, exists_on: url_map[failed_url])
  end
end

#callObject



52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# File 'lib/site_health.rb', line 52

def call
  url_map = Hash.new { |hash, key| hash[key] = [] }

  missing_html_title = []
  http_error_urls = []
  html_error_urls = []
  html_warning_urls = []
  xml_error_urls = []
  css_error_urls = []
  css_warning_urls = []

  spider = Spidr.site(site) do |spider|
    spider.every_link do |origin, destination|
      url_map[destination] << origin
    end

    spider.every_page do |page|
      code_journal = HTTPCodeJournal.new(url: page.url, code: page.code)
      http_error_urls << code_journal if code_journal.error?

      if page.css?
        result = Checkers::CSSPage.check(page)
        xml_error_urls << result if result.errors?
      end

      if page.xml?
        result = Checkers::XMLPage.check(page)
        xml_error_urls << result if result.errors?
      end

      if page.html?
        result = Checkers::HTMLPage.check(page)
        missing_html_title << result if result.missing_title?
        html_error_urls << result if result.errors?
      end
    end
  end

  http_error_urls = map_http_error_urls(http_error_urls, url_map)
  broken_urls = broken_links(spider, url_map) + http_error_urls

  ChecksJournal.new(
    missing_html_title: missing_html_title,
    broken_urls: broken_urls,
    http_error_urls: http_error_urls,
    html_error_urls: html_error_urls,
    html_warning_urls: html_warning_urls,
    xml_error_urls: xml_error_urls,
    css_error_urls: css_error_urls,
    css_warning_urls: css_warning_urls
  )
end

#map_http_error_urls(urls, url_map) ⇒ Object



113
114
115
116
117
# File 'lib/site_health.rb', line 113

def map_http_error_urls(urls, url_map)
  urls.map do |failed_url|
    BrokenLinkJournal.new(url: failed_url, exists_on: url_map[failed_url])
  end
end

#validate_css_page(page, errors) ⇒ Object



105
106
107
108
109
110
111
# File 'lib/site_health.rb', line 105

def validate_css_page(page, errors)
  css_checker = Checkers::CSSPage.new(page)
  result = css_checker.check
  return unless result.errors?

  result
end

#validate_html(html_url) ⇒ W3CValidators::Results

Returns:

  • (W3CValidators::Results)

Raises:

  • (W3CValidators::ValidatorUnavailable)

    the service is offline or returns 400 Bad Request

See Also:



131
132
133
134
# File 'lib/site_health.rb', line 131

def validate_html(html_url)
  validator = W3CValidators::NuValidator.new
  validator.validate_uri(html_url)
end