Class: Apollo::Agent::CrawlerAgent

Inherits:
BaseAgent
  • Object
show all
Defined in:
lib/apollo_crawler/agent/crawler_agent.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods inherited from BaseAgent

#run

Constructor Details

#initialize(amqp, opts = {}) ⇒ CrawlerAgent

Returns a new instance of CrawlerAgent.



33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# File 'lib/apollo_crawler/agent/crawler_agent.rb', line 33

def initialize(amqp, opts={})
	if(opts[:verbose])
		puts "Initializing crawler agent..."
	end

	# Declarations
	channel = amqp.create_channel
	self.declarations = Apollo::Agent.declare_entities(channel, opts)# Binding
	
	# Binding
	queue = self.declarations[:queues]["crawler.queue"]
	exchange = self.declarations[:exchanges]["crawler"]

	queue.bind(exchange).subscribe do |delivery_info, , payload|
		msg = JSON.parse(payload)

		request = msg['request']
		response = msg['response']
		url = request["url"]

		puts "CrawlerAgent: Received - '#{url}', metadata #{.inspect}" if opts[:verbose]

		doc = Nokogiri::HTML(response['body'])
		crawler = request['crawler_name'].constantize.new
		data = crawler.extract_data(doc)
		links = crawler.extract_links(doc)

		# puts crawler.to_s
		# puts res.inspect

		if([:reply_to] != nil)
			x = self.declarations[:exchanges][[:reply_to]]

			msg = {
				:request => request,
				:response => response,
				:data => data,
				:links => links
			}

			x.publish(msg.to_json)
		end
	end
end

Instance Attribute Details

#declarationsObject

Returns the value of attribute declarations.



31
32
33
# File 'lib/apollo_crawler/agent/crawler_agent.rb', line 31

def declarations
  @declarations
end