Class: SimpleRss

Inherits:
Object
  • Object
show all
Defined in:
lib/simple_rss.rb

Constant Summary collapse

VERSION =
"1.2.3"
@@feed_tags =
[
  :id,
  :title, :subtitle, :link,
  :description, 
  :author, :webMaster, :managingEditor, :contributor,
  :pubDate, :lastBuildDate, :updated, :'dc:date',
  :generator, :language, :docs, :cloud,
  :ttl, :skipHours, :skipDays,
  :image, :logo, :icon, :rating,
  :rights, :copyright,
  :textInput, :'feedburner:browserFriendly',
  :'itunes:author', :'itunes:category'
]
@@item_tags =
[
  :id,
  :title, :link, :'link+alternate', :'link+self', :'link+edit', :'link+replies',
  :author, :contributor,
  :description, :summary, :content, :'content:encoded', :comments,
  :pubDate, :published, :updated, :expirationDate, :modified, :'dc:date',
  :category, :guid,
  :'trackback:ping', :'trackback:about',
  :'dc:creator', :'dc:title', :'dc:subject', :'dc:rights', :'dc:publisher',
  :'feedburner:origLink',
  :'media:content#url', :'media:content#type', :'media:content#height', :'media:content#width',
  :'media:title', :'media:thumbnail#url', :'media:thumbnail#height', :'media:thumbnail#width',
  :'media:credit', :'media:credit#role',
  :'media:category', :'media:category#scheme'
]

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(source, options = {}) ⇒ SimpleRss

Returns a new instance of SimpleRss.



43
44
45
46
47
48
49
# File 'lib/simple_rss.rb', line 43

def initialize(source, options={})
  @source = source.respond_to?(:read) ? source.read : source.to_s
  @items = Array.new
   @options = Hash.new.update(options)
   
  parse
end

Instance Attribute Details

#itemsObject (readonly) Also known as: entries

Returns the value of attribute items.



10
11
12
# File 'lib/simple_rss.rb', line 10

def items
  @items
end

#sourceObject (readonly)

Returns the value of attribute source.



10
11
12
# File 'lib/simple_rss.rb', line 10

def source
  @source
end

Class Method Details

.feed_tagsObject



55
56
57
# File 'lib/simple_rss.rb', line 55

def feed_tags
  @@feed_tags
end

.feed_tags=(ft) ⇒ Object



58
59
60
# File 'lib/simple_rss.rb', line 58

def feed_tags=(ft)
  @@feed_tags = ft
end

.item_tagsObject



62
63
64
# File 'lib/simple_rss.rb', line 62

def item_tags
  @@item_tags
end

.item_tags=(it) ⇒ Object



65
66
67
# File 'lib/simple_rss.rb', line 65

def item_tags=(it)
  @@item_tags = it
end

.parse(source, options = {}) ⇒ Object

The strict attribute is for compatibility with Ruby’s standard RSS parser



70
71
72
# File 'lib/simple_rss.rb', line 70

def parse(source, options={})
  new source, options
end

Instance Method Details

#channelObject Also known as: feed



51
# File 'lib/simple_rss.rb', line 51

def channel() self end

#clean_content(tag, attrs, content) ⇒ Object



140
141
142
143
144
145
146
147
148
149
150
# File 'lib/simple_rss.rb', line 140

def clean_content(tag, attrs, content)
  content = content.to_s
  case tag
    when :pubDate, :lastBuildDate, :published, :updated, :expirationDate, :modified, :'dc:date'
      Time.parse(content) rescue unescape(content)
    when :author, :contributor, :skipHours, :skipDays
      unescape(content.gsub(/<.*?>/,''))
    else
      content.empty? && "#{attrs} " =~ /href=['"]?([^'"]*)['" ]/mi ? $1.strip : unescape(content)
  end
end

#clean_tag(tag) ⇒ Object



152
153
154
# File 'lib/simple_rss.rb', line 152

def clean_tag(tag)
  tag.to_s.gsub(':','_').intern
end

#parseObject

Raises:



76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# File 'lib/simple_rss.rb', line 76

def parse
  raise SimpleRSSError, "Poorly formatted feed" unless @source =~ %r{<(channel|feed).*?>.*?</(channel|feed)>}mi
  
  # Feed's title and link
  feed_content = $1 if @source =~ %r{(.*?)<(rss:|atom:)?(item|entry).*?>.*?</(rss:|atom:)?(item|entry)>}mi
  
  @@feed_tags.each do |tag|
    if feed_content && feed_content =~ %r{<(rss:|atom:)?#{tag}(.*?)>(.*?)</(rss:|atom:)?#{tag}>}mi
      nil
    elsif feed_content && feed_content =~ %r{<(rss:|atom:)?#{tag}(.*?)\/\s*>}mi
      nil
    elsif @source =~ %r{<(rss:|atom:)?#{tag}(.*?)>(.*?)</(rss:|atom:)?#{tag}>}mi
      nil
    elsif @source =~ %r{<(rss:|atom:)?#{tag}(.*?)\/\s*>}mi
      nil
    end
    
    if $2 || $3
       tag_cleaned = clean_tag(tag)
       instance_variable_set("@#{ tag_cleaned }", clean_content(tag, $2, $3))
       self.class.send(:attr_reader, tag_cleaned)
    end
  end

  # RSS items' title, link, and description
  @source.scan( %r{<(rss:|atom:)?(item|entry)([\s][^>]*)?>(.*?)</(rss:|atom:)?(item|entry)>}mi ) do |match|
    item = Hash.new
    @@item_tags.each do |tag|
      if tag.to_s.include?("+")
        tag_data = tag.to_s.split("+")
        tag = tag_data[0]
        rel = tag_data[1]
        
        if match[3] =~ %r{<(rss:|atom:)?#{tag}(.*?)rel=['"]#{rel}['"](.*?)>(.*?)</(rss:|atom:)?#{tag}>}mi
           nil
        elsif match[3] =~ %r{<(rss:|atom:)?#{tag}(.*?)rel=['"]#{rel}['"](.*?)/\s*>}mi
          nil
        end
        item[clean_tag("#{tag}+#{rel}")] = clean_content(tag, $3, $4) if $3 || $4
      elsif tag.to_s.include?("#")
        tag_data = tag.to_s.split("#")
        tag = tag_data[0]
        attrib = tag_data[1]
        if match[3] =~ %r{<(rss:|atom:)?#{tag}(.*?)#{attrib}=['"](.*?)['"](.*?)>(.*?)</(rss:|atom:)?#{tag}>}mi
           nil
        elsif match[3] =~ %r{<(rss:|atom:)?#{tag}(.*?)#{attrib}=['"](.*?)['"](.*?)/\s*>}mi
          nil
        end
        item[clean_tag("#{tag}_#{attrib}")] = clean_content(tag, attrib, $3) if $3
      else
        if match[3] =~ %r{<(rss:|atom:)?#{tag}(.*?)>(.*?)</(rss:|atom:)?#{tag}>}mi
          nil
        elsif match[3] =~ %r{<(rss:|atom:)?#{tag}(.*?)/\s*>}mi
          nil
        end
        item[clean_tag(tag)] = clean_content(tag, $2, $3) if $2 || $3
      end
    end
    def item.method_missing(name, *args) self[name] end
    @items << item
  end

end

#testObject



7
8
9
# File 'lib/simple_rss.rb', line 7

def test
 "DSC"
end

#unescape(content) ⇒ Object



156
157
158
159
160
161
162
# File 'lib/simple_rss.rb', line 156

def unescape(content)
  if content =~ /([^-_.!~*'()a-zA-Z\d;\/?:@&=+$,\[\]]%)/n then
    CGI.unescape(content).gsub(/(<!\[CDATA\[|\]\]>)/,'').strip
  else
    content.gsub(/(<!\[CDATA\[|\]\]>)/,'').strip
  end
end