Class: SimpleRSS

Inherits:
Object
  • Object
show all
Defined in:
lib/parsers/simple-rss.rb

Overview

Monkey patches for outstanding issues logged in the simple-rss project.

* Add support for issued time field:
  http://rubyforge.org/tracker/index.php?func=detail&aid=13980&group_id=893&atid=3517
* The '+' symbol is lost when escaping fields.
  http://rubyforge.org/tracker/index.php?func=detail&aid=10852&group_id=893&atid=3517

Instance Method Summary collapse

Instance Method Details

#clean_content(tag, attrs, content) ⇒ Object



13
14
15
16
17
18
19
20
21
22
23
# File 'lib/parsers/simple-rss.rb', line 13

def clean_content(tag, attrs, content)
  content = content.to_s
  case tag
    when :pubDate, :lastBuildDate, :published, :updated, :expirationDate, :modified, :'dc:date', :issued
      Time.parse(content) rescue unescape(content)
    when :author, :contributor, :skipHours, :skipDays
      unescape(content.gsub(/<.*?>/,''))
    else
      content.empty? && "#{attrs} " =~ /href=['"]?([^'"]*)['" ]/mi ? $1.strip : unescape(content)
  end
end

#unescape(s) ⇒ Object



26
27
28
29
30
31
32
33
34
35
36
37
# File 'lib/parsers/simple-rss.rb', line 26

def unescape(s)
  if s =~ /^\s*(<!\[CDATA\[|\]\]>)/
    # Raw HTML is inside the CDATA, so just remove the CDATA wrapper.
    s.gsub(/(<!\[CDATA\[|\]\]>)/,'')
  elsif s =~ /[<>]/
    # Already looks like HTML.
    s
  else
    # Make it HTML.
    FeedNormalizer::HtmlCleaner.unescapeHTML(s)
  end
end