Class: Apollo::PlatformProgram

Inherits:
BaseProgram show all
Defined in:
lib/apollo_crawler/program/platform_program.rb

Constant Summary collapse

DEFAULT_OPTIONS =
{
  :version => nil
}

Constants inherited from BaseProgram

BaseProgram::CONFIG_DIR

Instance Attribute Summary

Attributes inherited from BaseProgram

#amqp, #config, #mongo, #options, #optparser

Instance Method Summary collapse

Methods inherited from BaseProgram

get_config_path, #init_amqp, #init_mongo, #init_seeds, #init_seeds_crawlers, #load_config, #load_config_file, #load_configs, #parse_options, #request_exit, require_files

Constructor Details

#initializePlatformProgram

Initializer - Constructor



63
64
65
66
67
# File 'lib/apollo_crawler/program/platform_program.rb', line 63

def initialize
  super
  
  self.options.merge!(DEFAULT_OPTIONS)
end

Instance Method Details

#enqueue_crawlers_urls(amqp, crawlers = Apollo::Crawler::BaseCrawler.subclasses, opts = {}) ⇒ Object



100
101
102
103
104
105
# File 'lib/apollo_crawler/program/platform_program.rb', line 100

def enqueue_crawlers_urls(amqp, crawlers=Apollo::Crawler::BaseCrawler.subclasses, opts={})
  crawlers.each do |crawler|
    i = crawler.new
    Apollo::Scheduler::BaseScheduler::schedule(i.url, crawler)
  end  
end

#init_agents(amqp, opts = {}) ⇒ Object



125
126
127
128
129
130
131
# File 'lib/apollo_crawler/program/platform_program.rb', line 125

def init_agents(amqp, opts={})
  puts "Initializing agents"

  init_crawlers(amqp, opts)
  init_domainers(amqp, opts)
  init_fetchers(amqp, opts)  
end

#init_crawlers(amqp, opts = {}) ⇒ Object



107
108
109
110
# File 'lib/apollo_crawler/program/platform_program.rb', line 107

def init_crawlers(amqp, opts={})
  crawlers = []
  crawlers << Apollo::Agent::CrawlerAgent.new(amqp, self.options)
end

#init_domainers(amqp, opts = {}) ⇒ Object



112
113
114
115
# File 'lib/apollo_crawler/program/platform_program.rb', line 112

def init_domainers(amqp, opts={})
  domainers = []
  domainers << Apollo::Agent::DomainerAgent.new(amqp, self.options)
end

#init_domains(opts = {}) ⇒ Object



133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
# File 'lib/apollo_crawler/program/platform_program.rb', line 133

def init_domains(opts={})
  path = File.join(File.dirname(__FILE__), "../../../tmp/top-1m.csv")
  puts "#{path}"
  if(File.exists?(path) == false)
    return 0
  end

  Thread::new {
    CSV.foreach(path) do |row|
      name = row[1]
      domain = Apollo::Model::Domain.where({:name => name}).first()
      if(domain.nil?)
        domain = Apollo::Model::Domain.new({:name => name})
        domain.save
        print "."
      end
    end
  }
end

#init_fetchers(amqp, opts = {}) ⇒ Object



117
118
119
120
121
122
123
# File 'lib/apollo_crawler/program/platform_program.rb', line 117

def init_fetchers(amqp, opts={})
  fetchers = []
  fetchers << Apollo::Agent::FetcherAgent.new(amqp, self.options)

  # TODO: This should not be here!
  enqueue_crawlers_urls(amqp, Apollo::Crawler::BaseCrawler.subclasses, opts)
end

#init_optionsObject



69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# File 'lib/apollo_crawler/program/platform_program.rb', line 69

def init_options()
  self.optparser = OptionParser.new do | opts |
    opts.banner = "Usage: apollo-platform [OPTIONS]"

    opts.separator ""
          opts.separator "Specific options:"

    # This displays the help screen, all programs are
    # assumed to have this option.
    opts.on('-h', '--help', 'Display this screen') do
      self.options[:show_help] = true
    end

    opts.on('-e', '--environment [NAME]', "Environment used, default '#{options[:env]}'") do |name|
      self.options[:env] = name
    end

    opts.on('-d', '--daemon', 'Run Apollo Platform daemon') do
      self.options[:daemon] = true
    end

    opts.on('-v', '--verbose', 'Enable verbose output') do
      self.options[:verbose] = true
    end

    opts.on('-V', '--version', 'Show version info') do
      self.options[:version] = true
    end
  end
end

#init_program(args) ⇒ Object



153
154
155
156
157
158
159
# File 'lib/apollo_crawler/program/platform_program.rb', line 153

def init_program(args)
  res = super(args)
  return res unless res.nil?

  init_agents(self.amqp, self.options)
  return nil
end

#process_options(args) ⇒ Object



161
162
163
164
165
166
167
168
169
170
171
172
173
174
# File 'lib/apollo_crawler/program/platform_program.rb', line 161

def process_options(args)
  if(self.options[:version])
    puts Apollo::VERSION
    return 0
  end

  if(self.options[:show_help])
    puts optparser
    return 0
  end

  # Return nil, it means program can freely continue.
  return nil
end

#requeue_fetching_urls(opts = {}) ⇒ Object



176
177
178
179
180
181
182
183
184
# File 'lib/apollo_crawler/program/platform_program.rb', line 176

def requeue_fetching_urls(opts={})
  urls = Apollo::Model::QueuedUrl.where(:state => :fetching)
  urls.each do |url|
    puts "Requeing '#{url.inspect}'" if opts[:verbose]

    url.state = :queued
    url.save
  end
end

#run(args = ARGV) ⇒ Object

Run Program



187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
# File 'lib/apollo_crawler/program/platform_program.rb', line 187

def run(args = ARGV)
  res = super(args)
  return res unless res.nil?

  init_domains()

  requeue_fetching_urls(self.options)

  # Here we start
  # if(ARGV.length < 1)
  #  puts optparser
  #  return 0
  # end

  res_code = 0
  if(self.options[:daemon])
    planner = Apollo::Planner::SmartPlanner.new(self.amqp, self.mongo, self.options)
    res_code = planner.run(self.options)
  end

  return request_exit(res_code)
end