Class: Spider::VisitQueue
- Inherits:
-
Object
- Object
- Spider::VisitQueue
- Defined in:
- lib/queue.rb
Defined Under Namespace
Classes: IterationExit
Instance Attribute Summary collapse
-
#robot_txt ⇒ Object
Returns the value of attribute robot_txt.
-
#visit_count ⇒ Object
Returns the value of attribute visit_count.
Instance Method Summary collapse
- #empty? ⇒ Boolean
-
#initialize(robots = nil, agent = nil, finish = nil) ⇒ VisitQueue
constructor
A new instance of VisitQueue.
- #push_back(urls) ⇒ Object
- #push_front(urls) ⇒ Object
- #size ⇒ Object
- #stop ⇒ Object
- #visit_each ⇒ Object
Constructor Details
#initialize(robots = nil, agent = nil, finish = nil) ⇒ VisitQueue
Returns a new instance of VisitQueue.
17 18 19 20 21 22 23 |
# File 'lib/queue.rb', line 17 def initialize(robots=nil, agent=nil, finish=nil) @visited = BloomFilter.new(size: 10_000, error_rate: 0.001) @robot_txt = ExclusionParser.new(robots, agent) if robots @finalize = finish @visit_count = 0 @pending = [] end |
Instance Attribute Details
#robot_txt ⇒ Object
Returns the value of attribute robot_txt.
15 16 17 |
# File 'lib/queue.rb', line 15 def robot_txt @robot_txt end |
#visit_count ⇒ Object
Returns the value of attribute visit_count.
14 15 16 |
# File 'lib/queue.rb', line 14 def visit_count @visit_count end |
Instance Method Details
#empty? ⇒ Boolean
53 54 55 |
# File 'lib/queue.rb', line 53 def empty? @pending.empty? end |
#push_back(urls) ⇒ Object
45 46 47 |
# File 'lib/queue.rb', line 45 def push_back(urls) add_url(urls) {|u| @pending.unshift(u)} end |
#push_front(urls) ⇒ Object
41 42 43 |
# File 'lib/queue.rb', line 41 def push_front(urls) add_url(urls) {|u| @pending.push(u)} end |
#size ⇒ Object
49 50 51 |
# File 'lib/queue.rb', line 49 def size @pending.size end |
#visit_each ⇒ Object
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
# File 'lib/queue.rb', line 25 def visit_each begin until @pending.empty? url = @pending.pop if url_okay(url) yield url if block_given? @visited.insert(url) @visit_count += 1 end end rescue IterationExit end @finalize.call if @finalize end |