Class: Scruber::Core::Crawler
- Inherits:
-
Object
- Object
- Scruber::Core::Crawler
- Defined in:
- lib/scruber/core/crawler.rb
Instance Attribute Summary collapse
-
#fetcher ⇒ Object
readonly
Returns the value of attribute fetcher.
-
#queue ⇒ Object
readonly
Returns the value of attribute queue.
-
#scraper_name ⇒ Object
readonly
Returns the value of attribute scraper_name.
Class Method Summary collapse
Instance Method Summary collapse
-
#initialize(*args) ⇒ Crawler
constructor
A new instance of Crawler.
- #method_missing(method_sym, *arguments, &block) ⇒ Object
- #parser(page_type, options = {}, &block) ⇒ Object
- #respond_to?(method_sym, include_private = false) ⇒ Boolean
-
#run(&block) ⇒ Object
Run crawling.
Constructor Details
#initialize(*args) ⇒ Crawler
Returns a new instance of Crawler.
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 |
# File 'lib/scruber/core/crawler.rb', line 6 def initialize(*args) if args.first.is_a?(Hash) scraper_name = nil = args.first else scraper_name, = args ||= {} end @scraper_name = scraper_name.present? ? scraper_name : ENV['SCRUBER_SCRAPER_NAME'] raise Scruber::ArgumentError.new("Scraper name is empty. Pass it to `Scruber.run :name do` or through ENV['SCRUBER_SCRAPER_NAME']") if @scraper_name.blank? @scraper_name = @scraper_name.to_sym Scruber.configuration.() @callbacks_options = {} @callbacks = {} @on_complete_callbacks = {} @queue = Scruber::Queue.new(scraper_name: scraper_name) @fetcher = Scruber::Fetcher.new load_extenstions end |
Dynamic Method Handling
This class handles dynamic methods through the method_missing method
#method_missing(method_sym, *arguments, &block) ⇒ Object
51 52 53 54 55 56 57 58 |
# File 'lib/scruber/core/crawler.rb', line 51 def method_missing(method_sym, *arguments, &block) Scruber::Core::Crawler._registered_method_missings.each do |(pattern, func)| if (scan_results = method_sym.to_s.scan(pattern)).present? return instance_exec(method_sym, scan_results, arguments+[block], &(func)) end end super end |
Instance Attribute Details
#fetcher ⇒ Object (readonly)
Returns the value of attribute fetcher.
4 5 6 |
# File 'lib/scruber/core/crawler.rb', line 4 def fetcher @fetcher end |
#queue ⇒ Object (readonly)
Returns the value of attribute queue.
4 5 6 |
# File 'lib/scruber/core/crawler.rb', line 4 def queue @queue end |
#scraper_name ⇒ Object (readonly)
Returns the value of attribute scraper_name.
4 5 6 |
# File 'lib/scruber/core/crawler.rb', line 4 def scraper_name @scraper_name end |
Class Method Details
._registered_method_missings ⇒ Object
75 76 77 |
# File 'lib/scruber/core/crawler.rb', line 75 def _registered_method_missings @registered_method_missings ||= {} end |
.register_method_missing(pattern, &block) ⇒ Object
71 72 73 |
# File 'lib/scruber/core/crawler.rb', line 71 def register_method_missing(pattern, &block) _registered_method_missings[pattern] = block end |
Instance Method Details
#parser(page_type, options = {}, &block) ⇒ Object
47 48 49 |
# File 'lib/scruber/core/crawler.rb', line 47 def parser(page_type, ={}, &block) register_callback(page_type, , &block) end |
#respond_to?(method_sym, include_private = false) ⇒ Boolean
60 61 62 63 64 65 66 67 68 |
# File 'lib/scruber/core/crawler.rb', line 60 def respond_to?(method_sym, include_private = false) !Scruber::Core::Crawler._registered_method_missings.find do |(pattern, block)| if method_sym.to_s =~ pattern true else false end end.nil? || super(method_sym, include_private) end |
#run(&block) ⇒ Object
Run crawling.
30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
# File 'lib/scruber/core/crawler.rb', line 30 def run(&block) instance_eval &block while @queue.has_work? do @fetcher.run @queue while page = @queue.fetch_downloaded do if @callbacks[page.page_type.to_sym] processed_page = process_page(page, page.page_type.to_sym) instance_exec page, processed_page, &(@callbacks[page.page_type.to_sym]) page.processed! unless page.sent_to_redownload? end end end @on_complete_callbacks.each do |_,callback| instance_exec &(callback) end end |