Class: CobwebCrawlHelper
- Inherits:
-
Object
- Object
- CobwebCrawlHelper
- Defined in:
- lib/cobweb_crawl_helper.rb
Overview
The crawl class gives easy access to information about the crawl, and gives the ability to stop a crawl
Constant Summary collapse
- BATCH_SIZE =
200
- FINISHED =
"Finished"
- STARTING =
"Starting"
- CANCELLED =
"Cancelled"
Instance Attribute Summary collapse
-
#id ⇒ Object
Returns the value of attribute id.
Instance Method Summary collapse
- #destroy ⇒ Object
-
#initialize(data) ⇒ CobwebCrawlHelper
constructor
A new instance of CobwebCrawlHelper.
- #statistics ⇒ Object
- #status ⇒ Object
Constructor Details
#initialize(data) ⇒ CobwebCrawlHelper
Returns a new instance of CobwebCrawlHelper.
11 12 13 14 15 16 |
# File 'lib/cobweb_crawl_helper.rb', line 11 def initialize(data) @data = data # TAKING A LONG TIME TO RUN ON PRODUCTION BOX @stats = Stats.new(data) end |
Instance Attribute Details
#id ⇒ Object
Returns the value of attribute id.
4 5 6 |
# File 'lib/cobweb_crawl_helper.rb', line 4 def id @id end |
Instance Method Details
#destroy ⇒ Object
18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
# File 'lib/cobweb_crawl_helper.rb', line 18 def destroy = @data [:queue_name] = "cobweb_crawl_job" unless .has_key?(:queue_name) if RESQUE_INSTALLED [:finished_resque_queue] = CobwebFinishedJob unless .has_key?(:finished_resque_queue) end # set status as cancelled now so that we don't enqueue any further pages self.statistics.end_crawl(@data, true) if [:finished_resque_queue] && [:queue_system] == :resque && RESQUE_INSTALLED additional_stats = {:crawl_id => id, :crawled_base_url => @stats.redis.get("crawled_base_url")} additional_stats[:redis_options] = @data[:redis_options] unless @data[:redis_options] == {} additional_stats[:source_id] = [:source_id] unless [:source_id].nil? Resque.enqueue([:finished_resque_queue], @stats.get_statistics.merge(additional_stats)) end counter = 0 while(counter < 200) do break if self.statistics.get_status == CANCELLED sleep 1 counter += 1 end if [:queue_system] == :resque && RESQUE_INSTALLED position = Resque.size([:queue_name]) until position == 0 position-=BATCH_SIZE position = 0 if position < 0 job_items = Resque.peek([:queue_name], position, BATCH_SIZE) job_items.each do |item| if item["args"][0]["crawl_id"] == id # remove this job from the queue Resque.dequeue(CrawlJob, item["args"][0]) end end end end end |
#statistics ⇒ Object
61 62 63 |
# File 'lib/cobweb_crawl_helper.rb', line 61 def statistics @stats end |
#status ⇒ Object
65 66 67 |
# File 'lib/cobweb_crawl_helper.rb', line 65 def status statistics.get_status end |