8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
|
# File 'app/jobs/pageflow/chart/scrape_site_job.rb', line 8
def perform_with_result(scraped_site,
_options = {},
downloader: downloader_for(scraped_site))
downloader.load_following_refresh_tags(scraped_site.url,
raise_on_http_error: true) do |file|
scraper = Scraper.new(file.read, Chart.config.scraper_options)
scraped_site.html_file = StringIOWithContentType.new(
scraper.html,
file_name: 'file.html',
content_type: 'text/html'
)
downloader.load_all(scraper.javascript_urls_in_head,
extension: '.js',
before_each: begin_try_catch,
after_each: end_try_catch) do |javascript_head_file|
scraped_site.javascript_file = javascript_head_file
end
downloader.load_all(scraper.javascript_urls_in_body,
extension: '.js',
before_each: begin_try_catch,
after_each: end_try_catch) do |javascript_body_file|
scraped_site.javascript_body_file = javascript_body_file
end
downloader.load_all(scraper.stylesheet_urls,
extension: '.css',
separator: "\n;") do |stylesheet_file|
scraped_site.stylesheet_file = stylesheet_file
end
end
downloader.load(scraped_site.csv_url) do |file|
scraped_site.csv_file = file
end
:ok
rescue Downloader::HTTPError
:error
end
|