Class: DaimonSkycrawlers::Processor::Spider

Inherits:
Base
  • Object
show all
Defined in:
lib/daimon_skycrawlers/processor/spider.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods inherited from Base

#before_process, #process, #storage

Methods included from LoggerMixin

included

Constructor Details

#initializeSpider

Returns a new instance of Spider.



9
10
11
12
13
14
15
# File 'lib/daimon_skycrawlers/processor/spider.rb', line 9

def initialize
  super
  @link_filters = []
  @doc = nil
  @links = nil
  @enqueue = true
end

Instance Attribute Details

#enqueueObject

Returns the value of attribute enqueue.



7
8
9
# File 'lib/daimon_skycrawlers/processor/spider.rb', line 7

def enqueue
  @enqueue
end

Instance Method Details



17
18
19
20
21
22
23
# File 'lib/daimon_skycrawlers/processor/spider.rb', line 17

def append_link_filter(filter = nil, &block)
  if block_given?
    @link_filters << block
  else
    @link_filters << filter if filter.respond_to?(:call)
  end
end

#call(message) ⇒ Object

Parameters:

  • message (Hash)

    Must have key :url, :depth



28
29
30
31
32
33
34
35
36
37
38
39
40
41
# File 'lib/daimon_skycrawlers/processor/spider.rb', line 28

def call(message)
  key_url = message[:url]
  depth = Integer(message[:depth] || 2)
  return if message[:heartbeat]
  return if depth <= 1
  page = storage.find(key_url)
  @doc = Nokogiri::HTML(page.body)
  new_message = {
    depth: depth - 1,
  }
  links.each do |url|
    enqueue_url(url, new_message)
  end
end