Class: FeedParser::Feed

Inherits:
Object
  • Object
show all
Defined in:
lib/feedparser/feedparser.rb,
lib/feedparser/html-output.rb,
lib/feedparser/text-output.rb

Overview

an RSS/Atom feed

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(str = nil, uri = nil) ⇒ Feed

parse str to build a Feed



53
54
55
56
# File 'lib/feedparser/feedparser.rb', line 53

def initialize(str = nil, uri = nil)
  parse(str) if str
  parse_origin(uri) if uri
end

Instance Attribute Details

#creatorObject (readonly)

Returns the value of attribute creator.



47
48
49
# File 'lib/feedparser/feedparser.rb', line 47

def creator
  @creator
end

#descriptionObject (readonly)

Returns the value of attribute description.



47
48
49
# File 'lib/feedparser/feedparser.rb', line 47

def description
  @description
end

#encodingObject (readonly)

Returns the value of attribute encoding.



47
48
49
# File 'lib/feedparser/feedparser.rb', line 47

def encoding
  @encoding
end

#itemsObject (readonly)

Returns the value of attribute items.



47
48
49
# File 'lib/feedparser/feedparser.rb', line 47

def items
  @items
end

Returns the value of attribute link.



47
48
49
# File 'lib/feedparser/feedparser.rb', line 47

def link
  @link
end

#originObject (readonly)

Returns the value of attribute origin.



160
161
162
# File 'lib/feedparser/feedparser.rb', line 160

def origin
  @origin
end

#titleObject (readonly)

Returns the value of attribute title.



47
48
49
# File 'lib/feedparser/feedparser.rb', line 47

def title
  @title
end

#typeObject (readonly)

Returns the value of attribute type.



47
48
49
# File 'lib/feedparser/feedparser.rb', line 47

def type
  @type
end

#xmlObject (readonly)

REXML::Element for this feed.



50
51
52
# File 'lib/feedparser/feedparser.rb', line 50

def xml
  @xml
end

Instance Method Details

#parse(str) ⇒ Object

Determines all the fields using a string containing an XML document



60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# File 'lib/feedparser/feedparser.rb', line 60

def parse(str)
  str = FeedParser.recode(str)

  # Dirty hack: some feeds contain the & char. It must be changed to &
  str.gsub!(/&(\s+)/, '&\1')
  doc = REXML::Document.new(str)
  @xml = doc.root
  # get feed info
  @encoding = doc.encoding
  @title,@link,@description,@creator = nil
  @title = ""
  @items = []
  if doc.root.elements['channel'] || doc.root.elements['rss:channel']
    @type = "rss"
    # We have a RSS feed!
    # Title
    if (e = doc.root.elements['channel/title'] ||
      doc.root.elements['rss:channel/rss:title']) && e.text
      @title = e.text.unescape_html.toUTF8(@encoding).rmWhiteSpace!
    end
    # Link
    if (e = doc.root.elements['channel/link'] ||
        doc.root.elements['rss:channel/rss:link']) && e.text
      @link = e.text.rmWhiteSpace!
    end
    # Description
    if (e = doc.root.elements['channel/description'] || 
        doc.root.elements['rss:channel/rss:description']) && e.text
      @description = e.text.toUTF8(@encoding).rmWhiteSpace!
    end
    # Creator
    if ((e = doc.root.elements['channel/dc:creator']) && e.text) ||
        ((e = doc.root.elements['channel/author'] ||
        doc.root.elements['rss:channel/rss:author']) && e.text)
      @creator = e.text.unescape_html.toUTF8(@encoding).rmWhiteSpace!
    end
    # Items
    if doc.root.elements['channel/item']
      query = 'channel/item'
    elsif doc.root.elements['item']
      query = 'item'
    elsif doc.root.elements['rss:channel/rss:item']
      query = 'rss:channel/rss:item'
    else
      query = 'rss:item'
    end
    doc.root.each_element(query) { |e| @items << RSSItem::new(e, self) }

  elsif doc.root.elements['/feed']
    # We have an ATOM feed!
    @type = "atom"
    # Title
    if (e = doc.root.elements['/feed/title']) && e.text
      @title = e.text.unescape_html.toUTF8(@encoding).rmWhiteSpace!
    end
    # Link
    doc.root.each_element('/feed/link') do |e|
      if e.attribute('type') and (
          e.attribute('type').value == 'text/html' or
          e.attribute('type').value == 'application/xhtml' or
          e.attribute('type').value == 'application/xhtml+xml')
        if (h = e.attribute('href')) && h
          @link = h.value.rmWhiteSpace!
        end
      end
    end
    # Description
    if e = doc.root.elements['/feed/info']
      e = e.elements['div'] || e
      @description = e.to_s.toUTF8(@encoding).rmWhiteSpace!
    end
    # Items
    doc.root.each_element('/feed/entry') do |e|
       @items << AtomItem::new(e, self)
    end
  else
    raise UnknownFeedTypeException::new
  end
end

#parse_origin(uri) ⇒ Object



153
154
155
156
157
158
# File 'lib/feedparser/feedparser.rb', line 153

def parse_origin(uri)
  uri = URI.parse(uri)
  if uri.hostname && uri.scheme
    @origin = "#{uri.scheme}://#{uri.hostname}"
  end
end

#to_html(localtime = true) ⇒ Object



47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# File 'lib/feedparser/html-output.rb', line 47

def to_html(localtime = true)
  s = ''
  s += "<!doctype html>\n"
  s += "<html lang=en>\n"
  s += "<head>\n"
  s += "<meta charset=\"utf-8\"/>\n"
  s += "<title>#{@title.escape_html}</title>\n"
  s += FeedParser::STYLESHEET
  s += "</head>\n"
  s += "<body>\n"

  s += <<-EOF
<table class="feed-header">
  EOF
  r = ""
  r += "<a href=\"#{@link}\">\n" if @link
  if @title
    r += @title.escape_html
  elsif @link
    r += @link.escape_html
  else
    r += "Unnamed feed"
  end
  r += "</a>\n" if @link
  headline = "<tr><th>%s</th>\n<td>%s</td></tr>"
  s += (headline % ["Feed title:", r])
  s += (headline % ["Type:", @type])
  s += (headline % ["Encoding:", @encoding])
  s += (headline % ["Creator:", @creator.escape_html]) if @creator
  s += "</table>\n"

  if @description and @description !~ /\A\s*</m
    s += "<br/>\n"
  end
  s += "#{@description}" if @description

  @items.each do |i|
    s += "\n<hr/><!-- *********************************** -->\n"
    s += i.to_html(localtime)
  end
  s += "\n</body></html>\n"
  s
end

#to_s(localtime = true) ⇒ Object



140
141
142
143
144
145
146
147
148
149
150
151
# File 'lib/feedparser/feedparser.rb', line 140

def to_s(localtime = true)
  s  = ''
  s += "Type: #{@type}\n"
  s += "Encoding: #{@encoding}\n"
  s += "Title: #{@title}\n"
  s += "Link: #{link}\n"
  s += "Description: #{@description}\n"
  s += "Creator: #{@creator}\n"
  s += "\n"
  @items.each { |i| s += i.to_s(localtime) }
  s
end

#to_text(localtime = true, wrapto = false) ⇒ Object



36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# File 'lib/feedparser/text-output.rb', line 36

def to_text(localtime = true, wrapto = false)
  s = ''
  s += "Type: #{@type}\n"
  s += "Encoding: #{@encoding}\n"
  s += "Title: #{@title}\n"
  s += "Link: #{@link}\n"
  if @description
    s += "Description: #{@description.html2text}\n"
  else
    s += "Description:\n"
  end
  s += "Creator: #{@creator}\n"
  s += "\n"
  @items.each do |i|
    s += '*' * 40 + "\n"
    s += i.to_text(localtime, wrapto)
  end
  s
end