Class: Nanoc::Extra::Checking::Checks::ExternalLinks Private

Inherits:
Nanoc::Extra::Checking::Check show all
Defined in:
lib/nanoc/extra/checking/checks/external_links.rb

Overview

This class is part of a private API. You should avoid using this class if possible, as it may be removed or be changed in the future.

A validator that verifies that all external links point to a location that exists.

Defined Under Namespace

Classes: ArrayEnumerator, Result

Instance Attribute Summary

Attributes inherited from Nanoc::Extra::Checking::Check

#issues

Instance Method Summary collapse

Methods inherited from Nanoc::Extra::Checking::Check

#add_issue, create, #initialize

Methods included from Int::PluginRegistry::PluginMethods

#all, #identifier, #identifiers, #named, #register

Methods inherited from Int::Context

#get_binding, #initialize

Constructor Details

This class inherits a constructor from Nanoc::Extra::Checking::Check

Instance Method Details

#excluded?(href) ⇒ Boolean

This method is part of a private API. You should avoid using this method if possible, as it may be removed or be changed in the future.

Returns:

  • (Boolean)


170
171
172
173
# File 'lib/nanoc/extra/checking/checks/external_links.rb', line 170

def excluded?(href)
  excludes = @config.fetch(:checks, {}).fetch(:external_links, {}).fetch(:exclude, [])
  excludes.any? { |pattern| Regexp.new(pattern).match(href) }
end

#excluded_file?(file) ⇒ Boolean

This method is part of a private API. You should avoid using this method if possible, as it may be removed or be changed in the future.

Returns:

  • (Boolean)


175
176
177
178
# File 'lib/nanoc/extra/checking/checks/external_links.rb', line 175

def excluded_file?(file)
  excludes = @config.fetch(:checks, {}).fetch(:external_links, {}).fetch(:exclude_files, [])
  excludes.any? { |pattern| Regexp.new(pattern).match(file) }
end

#path_for_url(url) ⇒ Object

This method is part of a private API. You should avoid using this method if possible, as it may be removed or be changed in the future.



145
146
147
148
149
150
151
152
153
154
155
156
157
158
# File 'lib/nanoc/extra/checking/checks/external_links.rb', line 145

def path_for_url(url)
  path =
    if url.path.nil? || url.path.empty?
      '/'
    else
      url.path
    end

  if url.query
    path << '?' << url.query
  end

  path
end

#request_url_once(url, req_method = Net::HTTP::Head) ⇒ Object

This method is part of a private API. You should avoid using this method if possible, as it may be removed or be changed in the future.



160
161
162
163
164
165
166
167
168
# File 'lib/nanoc/extra/checking/checks/external_links.rb', line 160

def request_url_once(url, req_method = Net::HTTP::Head)
  req = req_method.new(path_for_url(url))
  http = Net::HTTP.new(url.host, url.port)
  if url.instance_of? URI::HTTPS
    http.use_ssl = true
    http.verify_mode = OpenSSL::SSL::VERIFY_NONE
  end
  http.request(req)
end

#runObject

This method is part of a private API. You should avoid using this method if possible, as it may be removed or be changed in the future.



14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# File 'lib/nanoc/extra/checking/checks/external_links.rb', line 14

def run
  # Find all broken external hrefs
  # TODO: de-duplicate this (duplicated in internal links check)
  filenames = output_filenames.select { |f| File.extname(f) == '.html' && !excluded_file?(f) }
  hrefs_with_filenames = ::Nanoc::Extra::LinkCollector.new(filenames, :external).filenames_per_href
  results = select_invalid(hrefs_with_filenames.keys)

  # Report them
  results.each do |res|
    filenames = hrefs_with_filenames[res.href]
    filenames.each do |filename|
      add_issue(
        "broken reference to #{res.href}: #{res.explanation}",
        subject: filename,
      )
    end
  end
end

#select_invalid(hrefs) ⇒ Object

This method is part of a private API. You should avoid using this method if possible, as it may be removed or be changed in the future.



58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# File 'lib/nanoc/extra/checking/checks/external_links.rb', line 58

def select_invalid(hrefs)
  enum = ArrayEnumerator.new(hrefs.sort)
  mutex = Mutex.new
  invalid = Set.new

  threads = []
  10.times do
    threads << Thread.new do
      loop do
        href = enum.next
        break if href.nil?

        res = validate(href)
        next unless res

        mutex.synchronize do
          invalid << res
        end
      end
    end
  end
  threads.each(&:join)

  invalid
end

#validate(href) ⇒ Object

This method is part of a private API. You should avoid using this method if possible, as it may be removed or be changed in the future.



84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
# File 'lib/nanoc/extra/checking/checks/external_links.rb', line 84

def validate(href)
  # Parse
  url = nil
  begin
    url = URI.parse(href)
  rescue URI::InvalidURIError
    return Result.new(href, 'invalid URI')
  end

  # Skip excluded URLs
  return nil if excluded?(href)

  # Skip non-HTTP URLs
  return nil if url.scheme !~ /^https?$/

  # Get status
  res = nil
  last_err = nil
  timeouts = [3, 5, 10, 30, 60]
  5.times do |i|
    begin
      Timeout.timeout(timeouts[i]) do
        res = request_url_once(url)
        if res.code == '405'
          res = request_url_once(url, Net::HTTP::Get)
        end
      end
    rescue => e
      last_err = e
      next # can not allow
    end

    if res.code =~ /^3..$/
      if i == 4
        return Result.new(href, 'too many redirects')
      end

      # Find proper location
      location = res['Location']
      if location !~ /^https?:\/\//
        base_url = url.dup
        base_url.path = (location =~ /^\// ? '' : '/')
        base_url.query = nil
        base_url.fragment = nil
        location = base_url.to_s + location
      end

      url = URI.parse(location)
    elsif res.code == '200'
      return nil
    else
      return Result.new(href, res.code)
    end
  end
  if last_err
    return Result.new(href, last_err.message)
  else
    raise 'should not have gotten here'
  end
end