Class: Aquatone::Collectors::Netcraft

Inherits:
Aquatone::Collector show all
Defined in:
lib/aquatone/collectors/netcraft.rb

Constant Summary collapse

BASE_URI =
"http://searchdns.netcraft.com/".freeze
HOSTNAME_REGEX =
/<a href="http:\/\/(.*?)\/" rel="nofollow">/.freeze
RESULTS_PER_PAGE =
20.freeze
DEFAULT_PAGES_TO_PROCESS =
10.freeze

Constants inherited from Aquatone::Collector

Aquatone::Collector::DEFAULT_PRIORITY

Instance Attribute Summary

Attributes inherited from Aquatone::Collector

#domain, #hosts

Instance Method Summary collapse

Methods inherited from Aquatone::Collector

cli_options, descendants, #execute!, #initialize, meta, meta=, priority, sluggified_name

Constructor Details

This class inherits a constructor from Aquatone::Collector

Instance Method Details

#runObject



18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# File 'lib/aquatone/collectors/netcraft.rb', line 18

def run
  last  = nil
  count = 0
  pages_to_process.times do |i|
    page = i + 1
    if page == 1
      uri = "#{BASE_URI}/?restriction=site+contains&host=*.#{url_escape(domain.name)}&lookup=wait..&position=limited"
    else
      uri = "#{BASE_URI}/?host=*.#{url_escape(domain.name)}&last=#{url_escape(last)}&from=#{count + 1}&restriction=site%20contains&position=limited"
    end
    response = get_request(uri,
      { :headers => { "Referer" => "http://searchdns.netcraft.com/" } }
    )
    hosts = extract_hostnames_from_response(response.body)
    last  = hosts.last
    count += hosts.count
    hosts.each { |host| add_host(host) }
    break if hosts.count != RESULTS_PER_PAGE
    random_sleep(5)
  end
end