Class: Spider::VisitQueue

Inherits:
Object
  • Object
show all
Defined in:
lib/queue.rb

Defined Under Namespace

Classes: IterationExit

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(robots = nil, agent = nil, finish = nil) ⇒ VisitQueue

Returns a new instance of VisitQueue.



17
18
19
20
21
22
23
# File 'lib/queue.rb', line 17

def initialize(robots=nil, agent=nil, finish=nil)
  @visited = BloomFilter.new(size: 10_000, error_rate: 0.001)
  @robot_txt = ExclusionParser.new(robots, agent) if robots
  @finalize = finish
  @visit_count = 0
  @pending = []
end

Instance Attribute Details

#robot_txtObject

Returns the value of attribute robot_txt.



15
16
17
# File 'lib/queue.rb', line 15

def robot_txt
  @robot_txt
end

#visit_countObject

Returns the value of attribute visit_count.



14
15
16
# File 'lib/queue.rb', line 14

def visit_count
  @visit_count
end

Instance Method Details

#empty?Boolean

Returns:

  • (Boolean)


53
54
55
# File 'lib/queue.rb', line 53

def empty?
  @pending.empty?
end

#push_back(urls) ⇒ Object



45
46
47
# File 'lib/queue.rb', line 45

def push_back(urls)
  add_url(urls) {|u| @pending.unshift(u)}
end

#push_front(urls) ⇒ Object



41
42
43
# File 'lib/queue.rb', line 41

def push_front(urls)
  add_url(urls) {|u| @pending.push(u)}
end

#sizeObject



49
50
51
# File 'lib/queue.rb', line 49

def size
  @pending.size
end

#stopObject

Raises:



57
58
59
# File 'lib/queue.rb', line 57

def stop
  raise IterationExit
end

#visit_eachObject



25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# File 'lib/queue.rb', line 25

def visit_each
  begin
    until @pending.empty?
      url = @pending.pop
      if url_okay(url) 
        yield url if block_given?
        @visited.insert(url)
        @visit_count += 1
      end
    end 
  rescue IterationExit
  end
  
  @finalize.call if @finalize
end