Class: WebMole::Crawler

Inherits:
Object
  • Object
show all
Defined in:
lib/webmole/crawler.rb

Instance Method Summary collapse

Constructor Details

#initialize(options) ⇒ Crawler

Returns a new instance of Crawler.



9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# File 'lib/webmole/crawler.rb', line 9

def initialize(options)
  @url = options[:url]
  @depth = options[:depth] || 3
  @delay = options[:delay] || 1.0
  @threads = options[:threads] || 1
  @verbose = options[:verbose]
  @restrict_domain = options[:restrict_domain]
  @initial_domain = URI(@url).host
  @timeout = options[:timeout] || 300
  @urls_to_scrape = Set.new
  @visited = Set.new
  @mutex = Mutex.new
  @output = options[:output]
  @format = options[:format]
  @save_source_url = options[:save_source_url]
  @user_agent_switcher = UserAgentSwitcher.new
  @gathering_mode = options[:gathering_mode]
  @scraper = Scraper.new(options[:scrape_option], options[:pattern], @gathering_mode)
  GatheringMode.setup if @gathering_mode
end

Instance Method Details

#crawlObject



30
31
32
33
34
35
36
37
38
39
40
41
# File 'lib/webmole/crawler.rb', line 30

def crawl
  start_time = Time.now

  puts "Phase 1: Discovering URLs to scrape...".colorize(:cyan)
  discover_urls

  puts "\nPhase 2: Scraping discovered URLs...".colorize(:cyan)
  process_urls

  end_time = Time.now
  print_summary(start_time, end_time)
end