Class: Apollo::Agent::CrawlerAgent

Inherits:
BaseAgent
  • Object
show all
Defined in:
lib/apollo_crawler/agent/crawler_agent.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods inherited from BaseAgent

#run

Constructor Details

#initialize(amqp, opts = {}) ⇒ CrawlerAgent

Returns a new instance of CrawlerAgent.



33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# File 'lib/apollo_crawler/agent/crawler_agent.rb', line 33

def initialize(amqp, opts={})
  if(opts[:verbose])
    puts "Initializing crawler agent..."
  end

  # Declarations
  channel = amqp.create_channel
  self.declarations = Apollo::Agent.declare_entities(channel, opts)# Binding
  
  # Binding
  queue = self.declarations[:queues]["crawler.queue"]
  exchange = self.declarations[:exchanges]["crawler"]

  queue.bind(exchange).subscribe do |delivery_info, , payload|
    msg = JSON.parse(payload)

    request = msg['request']
    response = msg['response']
    url = request["url"]

    puts "CrawlerAgent: Received - '#{url}', metadata #{metadata.inspect}" if opts[:verbose]

    doc = Nokogiri::HTML(response['body'])
    crawler = request['crawler_name'].constantize.new
    data = crawler.extract_data(doc)
    links = crawler.extract_links(doc)

    # puts crawler.to_s
    # puts res.inspect

    if([:reply_to] != nil)
      x = self.declarations[:exchanges][[:reply_to]]

      msg = {
        :request => request,
        :response => response,
        :data => data,
        :links => links
      }

      x.publish(msg.to_json)
    end
  end
end

Instance Attribute Details

#declarationsObject

Returns the value of attribute declarations.



31
32
33
# File 'lib/apollo_crawler/agent/crawler_agent.rb', line 31

def declarations
  @declarations
end