Class: Utterson::HtmlCheck

Inherits:
Object
  • Object
show all
Defined in:
lib/utterson/html_check.rb

Constant Summary collapse

@@semaphore =
Mutex.new
@@checked_urls =
{}

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(opts = {}) ⇒ HtmlCheck

Returns a new instance of HtmlCheck.



14
15
16
17
18
# File 'lib/utterson/html_check.rb', line 14

def initialize(opts={})
  @file = opts[:file]
  @root = opts[:root]
  @errors = {}
end

Instance Attribute Details

#errorsObject (readonly)

Returns the value of attribute errors.



9
10
11
# File 'lib/utterson/html_check.rb', line 9

def errors
  @errors
end

Instance Method Details

#add_error(file, url, response) ⇒ Object



93
94
95
96
# File 'lib/utterson/html_check.rb', line 93

def add_error(file, url, response)
  @errors[file] = {} if @errors[file].nil?
  @errors[file][url] = response
end

#check_local_uri(url, file) ⇒ Object



83
84
85
86
87
88
89
90
91
# File 'lib/utterson/html_check.rb', line 83

def check_local_uri(url, file)
  url.gsub!(/\?.*$/, '')
  if url =~ /^\//
    path = File.expand_path(".#{url}", @root)
  else
    path = File.expand_path(url, File.dirname(file))
  end
  add_error(file, url, "File not found") unless File.exists? path
end

#check_remote_uri(url, file) ⇒ Object



63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# File 'lib/utterson/html_check.rb', line 63

def check_remote_uri(url, file)
  begin
    uri = URI(url.gsub(/^\/\//, 'http://'))
  rescue URI::InvalidURIError => e
    return add_error(file, uri.to_s, e.message)
  end
  begin
    response = Net::HTTP.start(uri.host, uri.port,
                               :use_ssl => uri.scheme == 'https') do |http|
      p = uri.path.empty? ? "/" : uri.path
      http.head(p)
    end
    if response.code =~ /^[^23]/
      add_error(file, uri.to_s, response)
    end
  rescue => e
    add_error(file, uri.to_s, e.message)
  end
end

#check_uri(url, file) ⇒ Object



47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# File 'lib/utterson/html_check.rb', line 47

def check_uri(url, file)
  @@semaphore.synchronize do
    if @@checked_urls[url]
      return
    else
      @@checked_urls[url] = true
    end
  end

  if url =~ /^(https?:)?\/\//
    check_remote_uri url, file
  else
    check_local_uri url, file
  end
end

#collect_uris_from(f) ⇒ Object



37
38
39
40
41
42
43
44
45
# File 'lib/utterson/html_check.rb', line 37

def collect_uris_from(f)
  ret = []
  doc = Nokogiri::HTML(File.read(f))
  doc.traverse do |el|
    ret << el['src'] unless el['src'].nil?
    ret << el['href'] unless el['href'].nil?
  end
  ret
end

#runObject



24
25
26
27
28
29
30
31
32
33
34
35
# File 'lib/utterson/html_check.rb', line 24

def run
  Thread.new do
    collect_uris_from(@file).each do |u|
      check_uri(u, @file)
    end
    unless @result_handler.nil?
      @@semaphore.synchronize do
        @result_handler.call(errors: @errors, urls: @@checked_urls.count)
      end
    end
  end
end

#when_done(&handler) ⇒ Object



20
21
22
# File 'lib/utterson/html_check.rb', line 20

def when_done(&handler)
  @result_handler = handler
end