Class: Crawl::Register
- Inherits:
-
Object
- Object
- Crawl::Register
- Defined in:
- lib/crawl/register.rb
Defined Under Namespace
Classes: Result
Instance Method Summary collapse
- #add(pages) ⇒ Object
- #completed(page) ⇒ Object
- #error_pages ⇒ Object
- #errors? ⇒ Boolean
- #finished? ⇒ Boolean
-
#initialize ⇒ Register
constructor
A new instance of Register.
- #next_page ⇒ Object
- #no_links_found? ⇒ Boolean
- #processing_size ⇒ Object
- #retry(page) ⇒ Object
- #summarize ⇒ Object
Constructor Details
#initialize ⇒ Register
Returns a new instance of Register.
5 6 7 8 9 |
# File 'lib/crawl/register.rb', line 5 def initialize @unprocessed = Set.new @processing = Set.new @processed = Set.new end |
Instance Method Details
#add(pages) ⇒ Object
11 12 13 14 15 16 17 |
# File 'lib/crawl/register.rb', line 11 def add(pages) new_pages = pages.to_set - @processed - @processing - @unprocessed new_pages.each do |new_page| puts " Adding #{new_page.url}" if $verbose end @unprocessed.merge(new_pages) end |
#completed(page) ⇒ Object
34 35 36 37 |
# File 'lib/crawl/register.rb', line 34 def completed(page) @processed << page @processing.delete(page) end |
#error_pages ⇒ Object
47 48 49 |
# File 'lib/crawl/register.rb', line 47 def error_pages @processed.select{ |page| page.error } end |
#errors? ⇒ Boolean
51 52 53 |
# File 'lib/crawl/register.rb', line 51 def errors? !error_pages.empty? end |
#finished? ⇒ Boolean
39 40 41 |
# File 'lib/crawl/register.rb', line 39 def finished? @unprocessed.size + @processing.size == 0 end |
#next_page ⇒ Object
19 20 21 22 23 24 25 26 27 |
# File 'lib/crawl/register.rb', line 19 def next_page page = @unprocessed.first @unprocessed.delete(page) @processing << page if page if @processing.size > EM.threadpool_size puts "WARNING: #{@processing.size} pages are being process when EM threadpool only has #{EM.threadpool_size} threads." end page end |
#no_links_found? ⇒ Boolean
66 67 68 |
# File 'lib/crawl/register.rb', line 66 def no_links_found? @processed.size <= 1 end |
#processing_size ⇒ Object
43 44 45 |
# File 'lib/crawl/register.rb', line 43 def processing_size @processing.size end |
#retry(page) ⇒ Object
29 30 31 32 |
# File 'lib/crawl/register.rb', line 29 def retry(page) @unprocessed << page @processing.delete(page) end |
#summarize ⇒ Object
55 56 57 58 59 60 61 62 63 64 |
# File 'lib/crawl/register.rb', line 55 def summarize if errors? puts "\nPages with errors:" error_pages.each do |page| puts page.to_s end else puts "\n#{@processed.size} pages crawled without errors." end end |