Class: SitemapGen::XMLCrawler

Inherits:
Object
  • Object
show all
Defined in:
lib/sitemap_gen/xml_crawler.rb

Class Method Summary collapse

Class Method Details

.execute(xml_path, save_path) ⇒ Object



23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# File 'lib/sitemap_gen/xml_crawler.rb', line 23

def self.execute(xml_path, save_path)
  save_path ||= Dir.pwd
  xml = File.open(xml_path) { |f| Nokogiri::XML(f) }
  links = xml.css('loc').map(&:content)
  ::CSV.open("#{save_path}/sitemap_only_link_title.csv", 'wb') do |csv|
    csv << ['ID', 'Page title', 'URL']
    links.with_multithread(8) do |link|
      p link
      res = Net::HTTP.get_response(URI(link))
      html = Nokogiri::HTML(res.body)
      title = html.css('head title')&.first&.content
      csv << ['', title, link]
    end
  end
end