Class: Scruber::Core::Crawler

Inherits:
Object
  • Object
show all
Defined in:
lib/scruber/core/crawler.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(*args) ⇒ Crawler

Returns a new instance of Crawler.



6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
# File 'lib/scruber/core/crawler.rb', line 6

def initialize(*args)
  if args.first.is_a?(Hash)
    scraper_name = nil
    options = args.first
  else
    scraper_name, options = args
    options ||= {}
  end
  @scraper_name = scraper_name.present? ? scraper_name : ENV['SCRUBER_SCRAPER_NAME']
  raise Scruber::ArgumentError.new("Scraper name is empty. Pass it to `Scruber.run :name do` or through ENV['SCRUBER_SCRAPER_NAME']") if @scraper_name.blank?
  @scraper_name = @scraper_name.to_sym
  Scruber.configuration.merge_options(options)
  @callbacks_options = {}
  @callbacks = {}
  @on_complete_callbacks = {}
  @queue = Scruber::Queue.new(scraper_name: scraper_name)
  @fetcher = Scruber::Fetcher.new
  load_extenstions
end

Dynamic Method Handling

This class handles dynamic methods through the method_missing method

#method_missing(method_sym, *arguments, &block) ⇒ Object



51
52
53
54
55
56
57
58
# File 'lib/scruber/core/crawler.rb', line 51

def method_missing(method_sym, *arguments, &block)
  Scruber::Core::Crawler._registered_method_missings.each do |(pattern, func)|
    if (scan_results = method_sym.to_s.scan(pattern)).present?
      return instance_exec(method_sym, scan_results, arguments+[block], &(func))
    end
  end
  super
end

Instance Attribute Details

#fetcherObject (readonly)

Returns the value of attribute fetcher.



4
5
6
# File 'lib/scruber/core/crawler.rb', line 4

def fetcher
  @fetcher
end

#queueObject (readonly)

Returns the value of attribute queue.



4
5
6
# File 'lib/scruber/core/crawler.rb', line 4

def queue
  @queue
end

#scraper_nameObject (readonly)

Returns the value of attribute scraper_name.



4
5
6
# File 'lib/scruber/core/crawler.rb', line 4

def scraper_name
  @scraper_name
end

Class Method Details

._registered_method_missingsObject



75
76
77
# File 'lib/scruber/core/crawler.rb', line 75

def _registered_method_missings
  @registered_method_missings ||= {}
end

.register_method_missing(pattern, &block) ⇒ Object



71
72
73
# File 'lib/scruber/core/crawler.rb', line 71

def register_method_missing(pattern, &block)
  _registered_method_missings[pattern] = block
end

Instance Method Details

#parser(page_type, options = {}, &block) ⇒ Object



47
48
49
# File 'lib/scruber/core/crawler.rb', line 47

def parser(page_type, options={}, &block)
  register_callback(page_type, options, &block)
end

#respond_to?(method_sym, include_private = false) ⇒ Boolean

Returns:

  • (Boolean)


60
61
62
63
64
65
66
67
68
# File 'lib/scruber/core/crawler.rb', line 60

def respond_to?(method_sym, include_private = false)
  !Scruber::Core::Crawler._registered_method_missings.find do |(pattern, block)|
    if method_sym.to_s =~ pattern
      true
    else
      false
    end
  end.nil? || super(method_sym, include_private)
end

#run(&block) ⇒ Object

Run crawling.

Parameters:

  • block (Proc)

    crawler body



30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# File 'lib/scruber/core/crawler.rb', line 30

def run(&block)
  instance_eval &block
  while @queue.has_work? do
    @fetcher.run @queue
    while page = @queue.fetch_downloaded do
      if @callbacks[page.page_type.to_sym]
        processed_page = process_page(page, page.page_type.to_sym)
        instance_exec page, processed_page, &(@callbacks[page.page_type.to_sym])
        page.processed! unless page.sent_to_redownload?
      end
    end
  end
  @on_complete_callbacks.each do |_,callback|
    instance_exec &(callback)
  end
end