Class: Vessel::Engine

Inherits:
Object
  • Object
show all
Defined in:
lib/vessel/engine.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(klass, &block) ⇒ Engine

Returns a new instance of Engine.



11
12
13
14
15
16
17
# File 'lib/vessel/engine.rb', line 11

def initialize(klass, &block)
  @crawler_class = klass
  @settings = klass.settings
  @middleware = block || Middleware.build(*settings[:middleware])
  @queue = SizedQueue.new(settings[:max_threads])
  @scheduler = Scheduler.new(@queue, settings)
end

Instance Attribute Details

#crawler_classObject (readonly)

Returns the value of attribute crawler_class.



9
10
11
# File 'lib/vessel/engine.rb', line 9

def crawler_class
  @crawler_class
end

#middlewareObject (readonly)

Returns the value of attribute middleware.



9
10
11
# File 'lib/vessel/engine.rb', line 9

def middleware
  @middleware
end

#schedulerObject (readonly)

Returns the value of attribute scheduler.



9
10
11
# File 'lib/vessel/engine.rb', line 9

def scheduler
  @scheduler
end

#settingsObject (readonly)

Returns the value of attribute settings.



9
10
11
# File 'lib/vessel/engine.rb', line 9

def settings
  @settings
end

Class Method Details

.run(*args, &block) ⇒ Object



5
6
7
# File 'lib/vessel/engine.rb', line 5

def self.run(*args, &block)
  new(*args, &block).tap(&:run)
end

Instance Method Details

#handle(page, request) ⇒ Object



30
31
32
33
34
35
36
37
38
39
40
41
# File 'lib/vessel/engine.rb', line 30

def handle(page, request)
  crawler = @crawler_class.new(page)
  crawler.send(request.method) do |*args|
    if args.all? { |i| i.is_a?(Request) }
      scheduler.post(*args)
    else
      @middleware&.call(*args)
    end
  end
ensure
  page.close
end

#idle?Boolean

Returns:

  • (Boolean)


47
48
49
50
51
# File 'lib/vessel/engine.rb', line 47

def idle?
  @queue.empty? &&
  @scheduler.queue_length.zero? &&
  @scheduler.scheduled_task_count == @scheduler.completed_task_count
end

#runObject



19
20
21
22
23
24
25
26
27
28
# File 'lib/vessel/engine.rb', line 19

def run
  scheduler.post(*start_requests)

  until @queue.closed?
    message = @queue.pop
    raise(message) if message.is_a?(Exception)
    handle(*message)
    @queue.close if idle?
  end
end

#start_requestsObject



43
44
45
# File 'lib/vessel/engine.rb', line 43

def start_requests
  Request.build(*settings[:start_urls])
end