Class: Biblionet::Crawlers::PublisherCrawler

Inherits:

Base

Object
Base
Biblionet::Crawlers::PublisherCrawler

show all

Defined in:: lib/bookshark/crawlers/publisher_crawler.rb

Instance Method Summary collapse

#crawl_and_save ⇒ Object
#initialize(options = {}) ⇒ PublisherCrawler constructor

A new instance of PublisherCrawler.

Methods inherited from Base

#spider

Constructor Details

#initialize(options = {}) ⇒ `PublisherCrawler`

Returns a new instance of PublisherCrawler.

# File 'lib/bookshark/crawlers/publisher_crawler.rb', line 7

def initialize(options = {})
  options[:folder]    ||= 'lib/bookshark/storage/html_publisher_pages'
  options[:base_url]  ||= 'http://www.biblionet.gr/com/'
  options[:page_type] ||= 'publisher'
  options[:extension] ||= '.html'
  options[:start]     ||= 1
  options[:finish]    ||= 800
  options[:step]      ||= 100        
  super(options)
end

Instance Method Details

#crawl_and_save ⇒ `Object`

# File 'lib/bookshark/crawlers/publisher_crawler.rb', line 18

def crawl_and_save 
  downloader = Extractors::Base.new

  spider do |url_to_download, file_to_save|                   
    downloader.load_page(url_to_download)

    # Create a new directory (does nothing if directory exists) 
    path = File.dirname(file_to_save)
    FileUtils.mkdir_p path unless File.directory?(path)

    downloader.save_page(file_to_save) unless downloader.page.nil? or downloader.page.length < 1024

  end
end

Class: Biblionet::Crawlers::PublisherCrawler

Instance Method Summary collapse

Methods inherited from Base

Constructor Details

#initialize(options = {}) ⇒ PublisherCrawler

Instance Method Details

#crawl_and_save ⇒ Object

#initialize(options = {}) ⇒ `PublisherCrawler`

#crawl_and_save ⇒ `Object`