Class: Benchmark::HTTP::Spider

Inherits:
Object
  • Object
show all
Includes:
Async::Await
Defined in:
lib/benchmark/http/spider.rb

Instance Method Summary collapse

Constructor Details

#initialize(depth: nil, ignore: nil) ⇒ Spider

Returns a new instance of Spider.



23
24
25
26
# File 'lib/benchmark/http/spider.rb', line 23

def initialize(depth: nil, ignore: nil)
  @depth = depth
  @ignore = ignore
end

Instance Method Details



28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# File 'lib/benchmark/http/spider.rb', line 28

def extract_links(url, response)
  base = url
  
  body = response.read
  
  begin
    filter = LinksFilter.parse(body)
  rescue
    Console.logger.error(self) {$!}
    return []
  end
  
  if filter.base
    base = base + filter.base
  end
  
  filter.links.collect do |href|
    next if href.nil? or href.empty?
    
    begin
      full_url = base + href
      
      if full_url.host == url.host && full_url.kind_of?(URI::HTTP)
        yield full_url
      end
    rescue ArgumentError, URI::InvalidURIError
      Console.logger.warn(self) {"Could not fetch #{href}, relative to #{base}!"}
      next # Don't accumulate an item into the resulting array.
    end
  end.compact
end