Method: OutriderTools::Scrape.page

Defined in:
lib/outrider/tools.rb

.page(url, operate) ⇒ Object



75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# File 'lib/outrider/tools.rb', line 75

def self.page( url, operate )
  @log      = Logger.new('log/logfile.log', 'daily')
  files     = OutriderTools::Clean::file_types
  begin
    page_uri = URI.parse( url )
    doc      = Nokogiri.HTML( open(page_uri) ) 
    # Yield page and URI to the block passed in 
    data  = operate.( doc, page_uri )        

    # Find all the links on the page
    hrefs = doc.css('a[href]').map{ |a| a['href'] }

    clean_uris  = OutriderTools::Clean::tidy_urls( hrefs, page_uri, page_uri, files )
    return data, clean_uris
    
  rescue OpenURI::HTTPError # Guard against 404s
    @log.error "Skipping invalid link #{page_uri}"
  rescue ArgumentError => e
    @log.error "Skipping page that causes argument error: #{e}"
  rescue RuntimeError => e
    @log.error "Invalid Redirection: #{e}"
  rescue Exception => e
    @log.error "Error #{e}"
    raise e
  end
  
  return { :status => 'rejected' }
  
end