Class: Apollo::PlatformProgram

Inherits:
BaseProgram show all
Defined in:
lib/apollo_crawler/program/platform_program.rb

Constant Summary collapse

DEFAULT_OPTIONS =
{
	:version => nil
}

Constants inherited from BaseProgram

BaseProgram::CONFIG_DIR

Instance Attribute Summary

Attributes inherited from BaseProgram

#amqp, #config, #mongo, #options, #optparser

Instance Method Summary collapse

Methods inherited from BaseProgram

get_config_path, #init_amqp, #init_mongo, #init_seeds, #init_seeds_crawlers, #load_config, #load_config_file, #load_configs, #parse_options, #request_exit, require_files

Constructor Details

#initializePlatformProgram

Initializer - Constructor



63
64
65
66
67
# File 'lib/apollo_crawler/program/platform_program.rb', line 63

def initialize
	super
	
	self.options.merge!(DEFAULT_OPTIONS)
end

Instance Method Details

#enqueue_crawlers_urls(amqp, crawlers = Apollo::Crawler::BaseCrawler.subclasses, opts = {}) ⇒ Object



100
101
102
103
104
105
# File 'lib/apollo_crawler/program/platform_program.rb', line 100

def enqueue_crawlers_urls(amqp, crawlers=Apollo::Crawler::BaseCrawler.subclasses, opts={})
	crawlers.each do |crawler|
		i = crawler.new
		Apollo::Scheduler::BaseScheduler::schedule(i.url, crawler)
	end	
end

#init_agents(amqp, opts = {}) ⇒ Object



125
126
127
128
129
130
131
# File 'lib/apollo_crawler/program/platform_program.rb', line 125

def init_agents(amqp, opts={})
	puts "Initializing agents"

	init_crawlers(amqp, opts)
	init_domainers(amqp, opts)
	init_fetchers(amqp, opts)	
end

#init_crawlers(amqp, opts = {}) ⇒ Object



107
108
109
110
# File 'lib/apollo_crawler/program/platform_program.rb', line 107

def init_crawlers(amqp, opts={})
	crawlers = []
	crawlers << Apollo::Agent::CrawlerAgent.new(amqp, self.options)
end

#init_domainers(amqp, opts = {}) ⇒ Object



112
113
114
115
# File 'lib/apollo_crawler/program/platform_program.rb', line 112

def init_domainers(amqp, opts={})
	domainers = []
	domainers << Apollo::Agent::DomainerAgent.new(amqp, self.options)
end

#init_domains(opts = {}) ⇒ Object



133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# File 'lib/apollo_crawler/program/platform_program.rb', line 133

def init_domains(opts={})
	path = File.join(File.dirname(__FILE__), "../../../tmp/top-1m.csv")
	puts "#{path}"
	if(File.exists?(path) == false)
		return 0
	end

	Thread::new {
		Apollo::Helper::Mongo::csv_bulk_insert(path, Apollo::Model::Domain, 1000, false) do |row|
			rank = row[0].to_i
			name = row[1]

			res = {
				:rank => rank, 
				:name => name
			}

			res
		end
	}
end

#init_fetchers(amqp, opts = {}) ⇒ Object



117
118
119
120
121
122
123
# File 'lib/apollo_crawler/program/platform_program.rb', line 117

def init_fetchers(amqp, opts={})
	fetchers = []
	fetchers << Apollo::Agent::FetcherAgent.new(amqp, self.options)

	# TODO: This should not be here!
	enqueue_crawlers_urls(amqp, Apollo::Crawler::BaseCrawler.subclasses, opts)
end

#init_optionsObject



69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# File 'lib/apollo_crawler/program/platform_program.rb', line 69

def init_options()
	self.optparser = OptionParser.new do | opts |
		opts.banner = "Usage: apollo-platform [OPTIONS]"

		opts.separator ""
    			opts.separator "Specific options:"

		# This displays the help screen, all programs are
		# assumed to have this option.
		opts.on('-h', '--help', 'Display this screen') do
			self.options[:show_help] = true
		end

		opts.on('-e', '--environment [NAME]', "Environment used, default '#{options[:env]}'") do |name|
			self.options[:env] = name
		end

		opts.on('-d', '--daemon', 'Run Apollo Platform daemon') do
			self.options[:daemon] = true
		end

		opts.on('-v', '--verbose', 'Enable verbose output') do
			self.options[:verbose] = true
		end

		opts.on('-V', '--version', 'Show version info') do
			self.options[:version] = true
		end
	end
end

#init_program(args) ⇒ Object



155
156
157
158
159
160
161
# File 'lib/apollo_crawler/program/platform_program.rb', line 155

def init_program(args)
	res = super(args)
	return res unless res.nil?

	init_agents(self.amqp, self.options)
	return nil
end

#process_options(args) ⇒ Object



163
164
165
166
167
168
169
170
171
172
173
174
175
176
# File 'lib/apollo_crawler/program/platform_program.rb', line 163

def process_options(args)
	if(self.options[:version])
		puts Apollo::VERSION
		return 0
	end

	if(self.options[:show_help])
		puts optparser
		return 0
	end

	# Return nil, it means program can freely continue.
	return nil
end

#requeue_fetching_urls(opts = {}) ⇒ Object



178
179
180
181
182
183
184
185
186
# File 'lib/apollo_crawler/program/platform_program.rb', line 178

def requeue_fetching_urls(opts={})
	urls = Apollo::Model::QueuedUrl.where(:state => :fetching)
	urls.each do |url|
		puts "Requeing '#{url.inspect}'" if opts[:verbose]

		url.state = :queued
		url.save
	end
end

#run(args = ARGV) ⇒ Object

Run Program



189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
# File 'lib/apollo_crawler/program/platform_program.rb', line 189

def run(args = ARGV)
	res = super(args)
	return res unless res.nil?

	init_domains()

	requeue_fetching_urls(self.options)

	# Here we start
	# if(ARGV.length < 1)
	# 	puts optparser
	# 	return 0
	# end

	res_code = 0
	if(self.options[:daemon])
		planner = Apollo::Planner::SmartPlanner.new(self.amqp, self.mongo, self.options)
		res_code = planner.run(self.options)
	end

	return request_exit(res_code)
end