Class: Spider::VisitQueue
- Inherits:
-
Object
- Object
- Spider::VisitQueue
- Defined in:
- lib/queue.rb
Constant Summary collapse
- IterationExit =
Class.new(Exception)
Instance Attribute Summary collapse
-
#robot_txt ⇒ Object
Returns the value of attribute robot_txt.
-
#visit_count ⇒ Object
Returns the value of attribute visit_count.
Instance Method Summary collapse
- #clear_visited ⇒ Object
- #empty? ⇒ Boolean
-
#initialize(robots = nil, agent = nil, finish = nil) ⇒ VisitQueue
constructor
A new instance of VisitQueue.
- #mark(urls) ⇒ Object
- #push_back(urls) ⇒ Object
- #push_front(urls) ⇒ Object
- #size ⇒ Object
- #stop ⇒ Object
- #url_okay(url) ⇒ Object
- #visit_each ⇒ Object
Constructor Details
#initialize(robots = nil, agent = nil, finish = nil) ⇒ VisitQueue
Returns a new instance of VisitQueue.
16 17 18 19 20 21 22 |
# File 'lib/queue.rb', line 16 def initialize(robots = nil, agent = nil, finish = nil) @robot_txt = ExclusionParser.new(robots, agent) if robots @finalize = finish @visit_count = 0 clear_visited @pending = [] end |
Instance Attribute Details
#robot_txt ⇒ Object
Returns the value of attribute robot_txt.
14 15 16 |
# File 'lib/queue.rb', line 14 def robot_txt @robot_txt end |
#visit_count ⇒ Object
Returns the value of attribute visit_count.
13 14 15 |
# File 'lib/queue.rb', line 13 def visit_count @visit_count end |
Instance Method Details
#clear_visited ⇒ Object
64 65 66 |
# File 'lib/queue.rb', line 64 def clear_visited @visited = Bloomer.new(10_000, 0.001) end |
#empty? ⇒ Boolean
56 57 58 |
# File 'lib/queue.rb', line 56 def empty? @pending.empty? end |
#mark(urls) ⇒ Object
47 48 49 50 |
# File 'lib/queue.rb', line 47 def mark(urls) urls = [urls] unless urls.is_a? Array urls.each { |u| @visited.add(u) } end |
#push_back(urls) ⇒ Object
43 44 45 |
# File 'lib/queue.rb', line 43 def push_back(urls) add_url(urls) { |u| @pending.unshift(u) } end |
#push_front(urls) ⇒ Object
39 40 41 |
# File 'lib/queue.rb', line 39 def push_front(urls) add_url(urls) { |u| @pending.push(u) } end |
#size ⇒ Object
52 53 54 |
# File 'lib/queue.rb', line 52 def size @pending.size end |
#url_okay(url) ⇒ Object
68 69 70 71 72 |
# File 'lib/queue.rb', line 68 def url_okay(url) return false if @visited.include?(url) return false if @robot_txt && @robot_txt.excluded?(url) true end |
#visit_each ⇒ Object
24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
# File 'lib/queue.rb', line 24 def visit_each begin until @pending.empty? url = @pending.pop next unless url_okay(url) yield url.clone if block_given? @visited.add(url) @visit_count += 1 end rescue IterationExit end @finalize.call if @finalize end |