23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
|
# File 'lib/sitemap_gen/xml_crawler.rb', line 23
def self.execute(xml_path, save_path)
save_path ||= Dir.pwd
xml = File.open(xml_path) { |f| Nokogiri::XML(f) }
links = xml.css('loc').map(&:content)
::CSV.open("#{save_path}/sitemap_only_link_title.csv", 'wb') do |csv|
csv << ['ID', 'Page title', 'URL']
links.with_multithread(8) do |link|
p link
res = Net::HTTP.get_response(URI(link))
html = Nokogiri::HTML(res.body)
title = html.css('head title')&.first&.content
csv << ['', title, link]
end
end
end
|