Class: SitemapParser
- Inherits:
-
Object
- Object
- SitemapParser
- Defined in:
- lib/sitemap-parser.rb
Instance Method Summary collapse
-
#initialize(url, opts = {}) ⇒ SitemapParser
constructor
A new instance of SitemapParser.
- #raw_sitemap ⇒ Object
- #sitemap ⇒ Object
- #to_a ⇒ Object
- #urls ⇒ Object
Constructor Details
#initialize(url, opts = {}) ⇒ SitemapParser
Returns a new instance of SitemapParser.
6 7 8 9 |
# File 'lib/sitemap-parser.rb', line 6 def initialize(url, opts = {}) @url = url @options = {:followlocation => true, :recurse => false}.merge(opts) end |
Instance Method Details
#raw_sitemap ⇒ Object
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 |
# File 'lib/sitemap-parser.rb', line 11 def raw_sitemap @raw_sitemap ||= begin if @url =~ /\Ahttp/i request = Typhoeus::Request.new(@url, followlocation: @options[:followlocation]) request.on_complete do |response| if response.success? return response.body else raise "HTTP request to #{@url} failed" end end request.run elsif File.exist?(@url) && @url =~ /[\\\/]sitemap\.xml\Z/i open(@url) { |f| f.read } end end end |
#sitemap ⇒ Object
29 30 31 |
# File 'lib/sitemap-parser.rb', line 29 def sitemap @sitemap ||= Nokogiri::XML(raw_sitemap) end |
#to_a ⇒ Object
50 51 52 53 54 |
# File 'lib/sitemap-parser.rb', line 50 def to_a urls.map { |url| url.at("loc").content } rescue NoMethodError raise 'Malformed sitemap, url without loc' end |
#urls ⇒ Object
33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
# File 'lib/sitemap-parser.rb', line 33 def urls if sitemap.at('urlset') sitemap.at("urlset").search("url") elsif sitemap.at('sitemapindex') found_urls = [] if @options[:recurse] sitemap.at('sitemapindex').search('sitemap').each do |sitemap| child_sitemap_location = sitemap.at('loc').content found_urls << self.class.new(child_sitemap_location, :recurse => false).urls end end return found_urls.flatten else raise 'Malformed sitemap, no urlset' end end |