Class: Spider::VisitQueue

Inherits:
Object
  • Object
show all
Defined in:
lib/queue.rb

Constant Summary collapse

IterationExit =
Class.new(Exception)

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(robots = nil, agent = nil, finish = nil) ⇒ VisitQueue

Returns a new instance of VisitQueue.



16
17
18
19
20
21
22
# File 'lib/queue.rb', line 16

def initialize(robots = nil, agent = nil, finish = nil)
  @robot_txt = ExclusionParser.new(robots, agent) if robots
  @finalize = finish
  @visit_count = 0
  clear_visited
  @pending = []
end

Instance Attribute Details

#robot_txtObject

Returns the value of attribute robot_txt.



14
15
16
# File 'lib/queue.rb', line 14

def robot_txt
  @robot_txt
end

#visit_countObject

Returns the value of attribute visit_count.



13
14
15
# File 'lib/queue.rb', line 13

def visit_count
  @visit_count
end

Instance Method Details

#clear_visitedObject



64
65
66
# File 'lib/queue.rb', line 64

def clear_visited
  @visited =  Bloomer.new(10_000, 0.001)
end

#empty?Boolean

Returns:

  • (Boolean)


56
57
58
# File 'lib/queue.rb', line 56

def empty?
  @pending.empty?
end

#mark(urls) ⇒ Object



47
48
49
50
# File 'lib/queue.rb', line 47

def mark(urls)
  urls = [urls] unless urls.is_a? Array
  urls.each { |u| @visited.add(u) }
end

#push_back(urls) ⇒ Object



43
44
45
# File 'lib/queue.rb', line 43

def push_back(urls)
  add_url(urls) { |u| @pending.unshift(u) }
end

#push_front(urls) ⇒ Object



39
40
41
# File 'lib/queue.rb', line 39

def push_front(urls)
  add_url(urls) { |u| @pending.push(u) }
end

#sizeObject



52
53
54
# File 'lib/queue.rb', line 52

def size
  @pending.size
end

#stopObject

Raises:



60
61
62
# File 'lib/queue.rb', line 60

def stop
  raise IterationExit
end

#url_okay(url) ⇒ Object



68
69
70
71
72
# File 'lib/queue.rb', line 68

def url_okay(url)
  return false if @visited.include?(url)
  return false if @robot_txt && @robot_txt.excluded?(url)
  true
end

#visit_eachObject



24
25
26
27
28
29
30
31
32
33
34
35
36
37
# File 'lib/queue.rb', line 24

def visit_each
  begin
    until @pending.empty?
      url = @pending.pop
      next unless url_okay(url)
      yield url.clone if block_given?
      @visited.add(url)
      @visit_count += 1
    end
  rescue IterationExit
  end

  @finalize.call if @finalize
end