Class: Spix::FeedDiscovery::Feed

Inherits:
Hash show all
Defined in:
lib/spix_parser/tools/feed_discovery/feed.rb

Instance Method Summary collapse

Constructor Details

#initialize(url) ⇒ Feed

Returns a new instance of Feed.



5
6
7
8
9
10
11
12
# File 'lib/spix_parser/tools/feed_discovery/feed.rb', line 5

def initialize url
  self.url = url.to_s
  self.similars = []
  self.exceptions = []
  yield self if block_given?
rescue => error
  self.errors = [error]
end

Instance Method Details

#fetch(uri, limit = 10) ⇒ Object



55
56
57
58
59
60
61
62
63
64
65
66
67
68
# File 'lib/spix_parser/tools/feed_discovery/feed.rb', line 55

def fetch uri, limit = 10
  raise ArgumentError, 'HTTP redirect too deep' if limit == 0
  resp = Net::HTTP.get_response uri
  if resp.kind_of?(Net::HTTPRedirection) or (refresh_metatags = Nokogiri::HTML(resp.body).search('meta[@http-equiv=REFRESH]')).any?
    path = resp['location'] || refresh_metatags.first.get_attribute('content')[/http:\/\/.*/]
    from_redirect = base_uri.merge path
    self.url = from_redirect.to_s
    fetch from_redirect, limit - 1
  else
    resp.body 
  end
rescue
  String.new
end

#fetch_html(uri) ⇒ Object



74
75
76
# File 'lib/spix_parser/tools/feed_discovery/feed.rb', line 74

def fetch_html uri
  Nokogiri::HTML fetch uri
end

#fetch_xml(uri) ⇒ Object



70
71
72
# File 'lib/spix_parser/tools/feed_discovery/feed.rb', line 70

def fetch_xml uri
  Nokogiri::XML fetch uri
end

#find_shortcut_in(doc) ⇒ Object



47
48
49
50
51
52
53
# File 'lib/spix_parser/tools/feed_discovery/feed.rb', line 47

def find_shortcut_in doc
  doc.xpath(
    '//link[contains(translate(@rel, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz"), "shortcut")]', 
    '//link[contains(translate(@rel, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz"), "icon")]',
    '//link[contains(translate(@rel, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz"), "ico")]'
  ).map { |node| node.get_attribute "href" }
end

#parse_uri(path) ⇒ Object



98
99
100
# File 'lib/spix_parser/tools/feed_discovery/feed.rb', line 98

def parse_uri path
  URI.parse URI.encode path
end

#set_faviconObject



29
30
31
32
33
34
# File 'lib/spix_parser/tools/feed_discovery/feed.rb', line 29

def set_favicon
  if node = content.search('link').first
    path = parse_uri node.content.strip
    self.favicon = shortcut_from parse_uri path.select(:scheme, :host).join("://") rescue nil
  end
end

#set_titleObject



24
25
26
27
# File 'lib/spix_parser/tools/feed_discovery/feed.rb', line 24

def set_title
  node = content.search('title').first 
  self.title = node.content if node
end