Class: SitemapReader

Inherits:
Object
  • Object
show all
Defined in:
lib/sitemap_reader.rb

Overview

Parse sitemap

Example:

>> sm = SitemapReader.new('http://example.com/sitemap.xml').get_urls
=> [{:loc=>"http://example.com/page1", :lastmod=>"2013-08-18"},{:loc=>"http://example.com/page2", :lastmod=>nil}]

… or read from file like this:

>> sm = SitemapReader.new('./sitemap.xml').get_urls
=> [{:loc=>"http://example.com/page1", :lastmod=>"2013-08-18"},{:loc=>"http://example.com/page2", :lastmod=>nil}]

Instance Method Summary collapse

Constructor Details

#initialize(file_or_url) ⇒ SitemapReader

Arguments:

file_or_url: (String)


17
18
19
# File 'lib/sitemap_reader.rb', line 17

def initialize(file_or_url)
	@doc = Nokogiri::XML(get_sitemap(file_or_url))
end

Instance Method Details

#get_sitemap(file_or_url) ⇒ Object



32
33
34
35
36
37
38
39
# File 'lib/sitemap_reader.rb', line 32

def get_sitemap(file_or_url)
  if File.exist?(file_or_url)
    File.open(file_or_url)
  else
    require 'open-uri'
    open(file_or_url)
  end
end

#get_urlsObject



21
22
23
24
25
26
27
28
29
30
# File 'lib/sitemap_reader.rb', line 21

def get_urls
	@doc.css('url').map do |u|
		{
       loc: u.css('loc').first.content,
       lastmod: url_lastmod(u.css('lastmod').first),
       changefreq: url_changefreq(u.css('changefreq').first),
       priority: url_priority(u.css('priority').first)
     }
	end
end

#url_changefreq(changefreq) ⇒ Object



41
42
43
# File 'lib/sitemap_reader.rb', line 41

def url_changefreq(changefreq)
  changefreq.content unless changefreq.nil?
end

#url_lastmod(lastmod) ⇒ Object



49
50
51
52
53
54
# File 'lib/sitemap_reader.rb', line 49

def url_lastmod(lastmod)
  begin
    W3cDatetime::parse(lastmod.content) unless lastmod.nil?
  rescue ArgumentError
  end
end

#url_priority(priority) ⇒ Object



45
46
47
# File 'lib/sitemap_reader.rb', line 45

def url_priority(priority)
  priority.content.to_f unless priority.nil?
end