75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
|
# File 'lib/outrider/tools.rb', line 75
def self.page( url, operate )
@log = Logger.new('log/logfile.log', 'daily')
files = OutriderTools::Clean::file_types
begin
page_uri = URI.parse( url )
doc = Nokogiri.HTML( open(page_uri) )
data = operate.( doc, page_uri )
hrefs = doc.css('a[href]').map{ |a| a['href'] }
clean_uris = OutriderTools::Clean::tidy_urls( hrefs, page_uri, page_uri, files )
return data, clean_uris
rescue OpenURI::HTTPError @log.error "Skipping invalid link #{page_uri}"
rescue ArgumentError => e
@log.error "Skipping page that causes argument error: #{e}"
rescue RuntimeError => e
@log.error "Invalid Redirection: #{e}"
rescue Exception => e
@log.error "Error #{e}"
raise e
end
return { :status => 'rejected' }
end
|