Class: Macaron::Spawner

Inherits:
Object
  • Object
show all
Defined in:
lib/macaron/spawner.rb

Instance Method Summary collapse

Constructor Details

#initialize(url, options = {}) ⇒ Spawner

Returns a new instance of Spawner.



8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# File 'lib/macaron/spawner.rb', line 8

def initialize(url, options={})
  @options = options

  # threadpool(init workers, max workers, job timeout)
  threadpool = Threadpool.new(10, 10, job_timeout)

  # tasks saves the on-processing urls
  @tasks = Queue.new
  @tasks << url

  # parsed_urls used to prevent loop crawling
  @parsed_urls = [url]

  # awaiting_counter saves the awaiting task number
  @awaiting_counter = 1

  # bot is a webdriver
  if @options[:with_watir]
    bot = Watir::Browser.new
    bot.extend(MonitorMixin)
  end

  loop do
    break if @awaiting_counter == 0

    begin
      Timeout::timeout(task_timeout) { url = @tasks.shift }
    rescue
      next
    end

    job = Crawler.new(url, bot)
    job.add_observer(self)

    threadpool.load(job)
  end

  bot.close unless bot.nil?
end

Instance Method Details

#update(links) ⇒ Object



48
49
50
51
52
53
54
55
56
57
# File 'lib/macaron/spawner.rb', line 48

def update(links)
  @awaiting_counter -= 1
  links.each do |link|
    unless @parsed_urls.include?(link)
      @tasks << link
      @awaiting_counter += 1
    end
    @parsed_urls << link
  end
end