Class: Nanoc::Checking::Checks::ExternalLinks Private

Inherits:
Nanoc::Checking::Check show all
Defined in:
lib/nanoc/checking/checks/external_links.rb

Overview

This class is part of a private API. You should avoid using this class if possible, as it may be removed or be changed in the future.

A validator that verifies that all external links point to a location that exists.

Defined Under Namespace

Classes: Result

Instance Attribute Summary

Attributes inherited from Nanoc::Checking::Check

#issues

Instance Method Summary collapse

Methods inherited from Nanoc::Checking::Check

#add_issue, create, define, #excluded_patterns, #initialize, #output_filenames, #output_html_filenames

Constructor Details

This class inherits a constructor from Nanoc::Checking::Check

Instance Method Details

#excluded?(href) ⇒ Boolean

This method is part of a private API. You should avoid using this method if possible, as it may be removed or be changed in the future.

Returns:

  • (Boolean)


155
156
157
158
# File 'lib/nanoc/checking/checks/external_links.rb', line 155

def excluded?(href)
  excludes = @config.fetch(:checks, {}).fetch(:external_links, {}).fetch(:exclude, [])
  excludes.any? { |pattern| Regexp.new(pattern).match(href) }
end

#excluded_file?(file) ⇒ Boolean

This method is part of a private API. You should avoid using this method if possible, as it may be removed or be changed in the future.

Returns:

  • (Boolean)


160
161
162
163
# File 'lib/nanoc/checking/checks/external_links.rb', line 160

def excluded_file?(file)
  excludes = @config.fetch(:checks, {}).fetch(:external_links, {}).fetch(:exclude_files, [])
  excludes.any? { |pattern| Regexp.new(pattern).match(file) }
end

#extract_location(res, url) ⇒ Object

This method is part of a private API. You should avoid using this method if possible, as it may be removed or be changed in the future.



112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
# File 'lib/nanoc/checking/checks/external_links.rb', line 112

def extract_location(res, url)
  location = res['Location']

  case location
  when nil
    nil
  when /^https?:\/\//
    location
  else
    base_url = url.dup
    base_url.path = (/^\//.match?(location) ? '' : '/')
    base_url.query = nil
    base_url.fragment = nil
    base_url.to_s + location
  end
end

#path_for_url(url) ⇒ Object

This method is part of a private API. You should avoid using this method if possible, as it may be removed or be changed in the future.



129
130
131
132
133
134
135
136
137
138
139
140
141
142
# File 'lib/nanoc/checking/checks/external_links.rb', line 129

def path_for_url(url)
  path =
    if url.path.nil? || url.path.empty?
      '/'
    else
      url.path
    end

  if url.query
    path = path + '?' + url.query
  end

  path
end

#request_url_once(url) ⇒ Object

This method is part of a private API. You should avoid using this method if possible, as it may be removed or be changed in the future.



144
145
146
147
148
149
150
151
152
153
# File 'lib/nanoc/checking/checks/external_links.rb', line 144

def request_url_once(url)
  req = Net::HTTP::Get.new(path_for_url(url))
  req['User-Agent'] = user_agent
  http = Net::HTTP.new(url.host, url.port)
  if url.instance_of? URI::HTTPS
    http.use_ssl = true
    http.verify_mode = OpenSSL::SSL::VERIFY_NONE
  end
  http.request(req)
end

#runObject

This method is part of a private API. You should avoid using this method if possible, as it may be removed or be changed in the future.



12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# File 'lib/nanoc/checking/checks/external_links.rb', line 12

def run
  # Find all broken external hrefs
  # TODO: de-duplicate this (duplicated in internal links check)
  filenames = output_html_filenames.reject { |f| excluded_file?(f) }
  hrefs_with_filenames = ::Nanoc::Checking::LinkCollector.new(filenames, :external).filenames_per_href
  results = select_invalid(hrefs_with_filenames.keys.shuffle)

  # Report them
  results.each do |res|
    filenames = hrefs_with_filenames[res.href]
    filenames.each do |filename|
      add_issue(
        "broken reference to <#{res.href}>: #{res.explanation}",
        subject: filename,
      )
    end
  end
end

#select_invalid(hrefs) ⇒ Object

This method is part of a private API. You should avoid using this method if possible, as it may be removed or be changed in the future.



41
42
43
# File 'lib/nanoc/checking/checks/external_links.rb', line 41

def select_invalid(hrefs)
  ::Parallel.map(hrefs, in_threads: 10) { |href| validate(href) }.compact
end

#user_agentObject

This method is part of a private API. You should avoid using this method if possible, as it may be removed or be changed in the future.



165
166
167
# File 'lib/nanoc/checking/checks/external_links.rb', line 165

def user_agent
  @_user_agent ||= custom_user_agent || default_user_agent
end

#validate(href) ⇒ Object

This method is part of a private API. You should avoid using this method if possible, as it may be removed or be changed in the future.



45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# File 'lib/nanoc/checking/checks/external_links.rb', line 45

def validate(href)
  # Skip javascript: URLs
  #
  # This needs to be handled explicitly, because URI.parse does not
  # like `javascript:` URLs -- presumably because those are not
  # technically valid URLs.
  return nil if href.start_with?('javascript:')

  # Parse
  url = nil
  begin
    url = URI.parse(href)
  rescue URI::Error
    return Result.new(href, 'invalid URI')
  end

  # Skip excluded URLs
  return nil if excluded?(href)

  # Skip non-HTTP URLs
  return nil if url.scheme !~ /^https?$/

  # Get status
  res = nil
  last_err = nil
  timeouts = [3, 5, 10, 30, 60]
  5.times do |i|
    begin
      Timeout.timeout(timeouts[i]) do
        res = request_url_once(url)
      end
    rescue => e
      last_err = e
      next
    end

    case res.code
    when /^3..$/
      if i == 4
        return Result.new(href, 'too many redirects')
      end

      location = extract_location(res, url)
      return Result.new(href, 'redirection without a target location') if location.nil?

      # ignore redirects back onto self (misused to set HTTP cookies)
      return nil if href == location

      if /^30[18]$/.match?(res.code)
        return Result.new(href, "link has moved permanently to '#{location}'")
      end

      url = URI.parse(location)
    when '200'
      return nil
    else
      return Result.new(href, res.code)
    end
  end

  if last_err
    Result.new(href, last_err.message)
  else
    raise Nanoc::Core::Errors::InternalInconsistency, 'last_err cannot be nil'
  end
end