Class: DaimonSkycrawlers::SitemapParser

Inherits:
Object
  • Object
show all
Defined in:
lib/daimon_skycrawlers/sitemap_parser.rb

Overview

Instance Method Summary collapse

Constructor Details

#initialize(urls, options = {}) ⇒ SitemapParser



9
10
11
# File 'lib/daimon_skycrawlers/sitemap_parser.rb', line 9

def initialize(urls, options = {})
  @urls = urls
end

Instance Method Details

#parseObject



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# File 'lib/daimon_skycrawlers/sitemap_parser.rb', line 13

def parse
  hydra = Typhoeus::Hydra.new(max_concurrency: 1)
  sitemap_urls = []
  @urls.each do |url|
    if URI(url).scheme.start_with?("http")
      request = Typhoeus::Request.new(url, followlocation: true)
      request.on_complete do |response|
        sitemap_urls.concat(on_complete(response))
      end
      hydra.queue(request)
    else
      if File.exist?(url)
        extract_urls(File.read(url))
      end
    end
  end
  hydra.run
  sitemap_urls
end