Class: SiteHealth::Check

Inherits:

Object

Object
SiteHealth::Check

show all

Defined in:: lib/site_health.rb

Defined Under Namespace

Classes: HTTPCodeJournal

Constant Summary collapse

BrokenLinkJournal =

KeyStruct.new(:url, :exists_on)

ChecksJournal =

KeyStruct.new(
  :missing_html_title,
  :broken_urls,
  :http_error_urls,
  :html_error_urls,
  :html_warning_urls,
  :xml_error_urls,
  :css_error_urls,
  :css_warning_urls
)

Instance Attribute Summary collapse

#site ⇒ Object readonly

Returns the value of attribute site.

Class Method Summary collapse

.call(**args) ⇒ Object

Instance Method Summary collapse

#broken_links(spider, url_map) ⇒ Object

Finds all pages which have broken links:.
#call ⇒ Object
#initialize(site:) ⇒ Check constructor

A new instance of Check.
#map_http_error_urls(urls, url_map) ⇒ Object
#validate_css_page(page, errors) ⇒ Object
#validate_html(html_url) ⇒ W3CValidators::Results

Constructor Details

#initialize(site:) ⇒ `Check`

Returns a new instance of Check.



48
49
50

# File 'lib/site_health.rb', line 48

def initialize(site:)
  @site = site
end

Instance Attribute Details

#site ⇒ `Object` (readonly)

Returns the value of attribute site.



46
47
48

# File 'lib/site_health.rb', line 46

def site
  @site
end

Class Method Details

.call(**args) ⇒ `Object`



22
23
24

# File 'lib/site_health.rb', line 22

def self.call(**args)
  new(**args).call
end

Instance Method Details

#broken_links(spider, url_map) ⇒ `Object`

Finds all pages which have broken links:

# File 'lib/site_health.rb', line 120

def broken_links(spider, url_map)
  # FIXME: spider#failures only returns timeout errors etc and not HTTP error status codes..
  #        so we need to have 2 types of "failed" URLs
  spider.failures.map do |failed_url|
    BrokenLinkJournal.new(url: failed_url, exists_on: url_map[failed_url])
  end
end

#call ⇒ `Object`

# File 'lib/site_health.rb', line 52

def call
  url_map = Hash.new { |hash, key| hash[key] = [] }

  missing_html_title = []
  http_error_urls = []
  html_error_urls = []
  html_warning_urls = []
  xml_error_urls = []
  css_error_urls = []
  css_warning_urls = []

  spider = Spidr.site(site) do |spider|
    spider.every_link do |origin, destination|
      url_map[destination] << origin
    end

    spider.every_page do |page|
      code_journal = HTTPCodeJournal.new(url: page.url, code: page.code)
      http_error_urls << code_journal if code_journal.error?

      if page.css?
        result = Checkers::CSSPage.check(page)
        xml_error_urls << result if result.errors?
      end

      if page.xml?
        result = Checkers::XMLPage.check(page)
        xml_error_urls << result if result.errors?
      end

      if page.html?
        result = Checkers::HTMLPage.check(page)
        missing_html_title << result if result.missing_title?
        html_error_urls << result if result.errors?
      end
    end
  end

  http_error_urls = map_http_error_urls(http_error_urls, url_map)
  broken_urls = broken_links(spider, url_map) + http_error_urls

  ChecksJournal.new(
    missing_html_title: missing_html_title,
    broken_urls: broken_urls,
    http_error_urls: http_error_urls,
    html_error_urls: html_error_urls,
    html_warning_urls: html_warning_urls,
    xml_error_urls: xml_error_urls,
    css_error_urls: css_error_urls,
    css_warning_urls: css_warning_urls
  )
end

#map_http_error_urls(urls, url_map) ⇒ `Object`

# File 'lib/site_health.rb', line 113

def map_http_error_urls(urls, url_map)
  urls.map do |failed_url|
    BrokenLinkJournal.new(url: failed_url, exists_on: url_map[failed_url])
  end
end

#validate_css_page(page, errors) ⇒ `Object`

# File 'lib/site_health.rb', line 105

def validate_css_page(page, errors)
  css_checker = Checkers::CSSPage.new(page)
  result = css_checker.check
  return unless result.errors?

  result
end

#validate_html(html_url) ⇒ `W3CValidators::Results`

Returns:

(W3CValidators::Results)

Raises:

(W3CValidators::ValidatorUnavailable) —

the service is offline or returns 400 Bad Request

Class: SiteHealth::Check

Defined Under Namespace

Constant Summary collapse

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(site:) ⇒ Check

Instance Attribute Details

#site ⇒ Object (readonly)

Class Method Details

.call(**args) ⇒ Object

Instance Method Details

#broken_links(spider, url_map) ⇒ Object

#call ⇒ Object

#map_http_error_urls(urls, url_map) ⇒ Object

#validate_css_page(page, errors) ⇒ Object

#validate_html(html_url) ⇒ W3CValidators::Results

#initialize(site:) ⇒ `Check`

#site ⇒ `Object` (readonly)

.call(**args) ⇒ `Object`

#broken_links(spider, url_map) ⇒ `Object`

#call ⇒ `Object`

#map_http_error_urls(urls, url_map) ⇒ `Object`

#validate_css_page(page, errors) ⇒ `Object`

#validate_html(html_url) ⇒ `W3CValidators::Results`