Class: FeedParser::AtomFeedBuilder

Inherits:
Object
  • Object
show all
Includes:
LogUtils::Logging
Defined in:
lib/feedparser/builder/atom.rb

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(atom_feed) ⇒ AtomFeedBuilder

Returns a new instance of AtomFeedBuilder.



15
16
17
# File 'lib/feedparser/builder/atom.rb', line 15

def initialize( atom_feed )
  @feed = build_feed( atom_feed )
end

Class Method Details

.build(atom_feed) ⇒ Object



10
11
12
13
# File 'lib/feedparser/builder/atom.rb', line 10

def self.build( atom_feed )
  feed = self.new( atom_feed )
  feed.to_feed
end

Instance Method Details

#build_author(atom_author) ⇒ Object



117
118
119
120
121
122
123
124
125
126
127
# File 'lib/feedparser/builder/atom.rb', line 117

def build_author( atom_author )
  ## pp atom_author
  author = Author.new

  ## note: always strip leading n trailing spaces (from content)
  author.name  = atom_author.name.content.strip    if atom_author.name
  author.url   = atom_author.uri.content.strip     if atom_author.uri
  author.email = atom_author.email.content.strip   if atom_author.email

  author
end

#build_feed(atom_feed) ⇒ Object

fix/todo: rename atom_feed to atom or wire or xml or in ???



25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
# File 'lib/feedparser/builder/atom.rb', line 25

def build_feed( atom_feed )    ## fix/todo: rename atom_feed to atom or wire or xml or in ???
  feed = Feed.new
  feed.format = 'atom'

  feed.title  = handle_content( atom_feed.title, 'feed.title' )

  logger.debug "  atom | feed.id.content  >#{atom_feed.id.content}< : #{atom_feed.id.content.class.name}"


  ## try to find self link if present
  ## note: use links (plural to allow multiple links e.g. self,alternate,etc.)
  atom_feed.links.each_with_index do |link,i|
    logger.debug "  atom | feed.link[#{i+1}]  rel=>#{link.rel}< : #{link.rel.class.name} type=>#{link.type}< href=>#{link.href}<"

    if feed.feed_url.nil? && link.rel == 'self'
      feed.feed_url = link.href
    end
  end


  feed.url = nil

  ## note: use links (plural to allow multiple links e.g. self,alternate,etc.)
  atom_feed.links.each_with_index do |link,i|
    logger.debug "  atom | feed.link[#{i+1}]  rel=>#{link.rel}< : #{link.rel.class.name} type=>#{link.type}< href=>#{link.href}<"

    ## for now assume alternate is link or no rel specified (assumes alternate)
    ##   note: only set if feed.url is NOT already set (via <id> for example)
    if feed.url.nil? && (link.rel == 'alternate' || link.rel.nil?)
      feed.url = link.href
    end
  end

  if feed.url.nil?
    ### todo/fix: issue warning - no link found!!!!
  end

  ## note: as fallback try id if still no url found - why?? why not??
  ##   use url only if starts_with http
  ##     might not be link e.g blogger uses for ids =>
  ##    <id>tag:blogger.com,1999:blog-4704664917418794835</id>
  ##
  ##  note: id might actually be link to feed NOT to site  (remove fallback - why - why not???)
  ##
  ## Note: remove (strip) leading and trailing spaces and newlines

  if feed.url.nil? && atom_feed.id.content.strip.start_with?( 'http' )
    feed.url = atom_feed.id.content.strip
  end


  if atom_feed.updated && atom_feed.updated.content    ## note: content might be nil if <updated></updated> empty
    feed.updated_local = handle_date( atom_feed.updated, 'feed.updated' )
    feed.updated       = feed.updated_local.utc
  end

  if atom_feed.generator
    ## Note: remove (strip) leading and trailing spaces and newlines
    feed.generator.name =  atom_feed.generator.content.strip
    logger.debug "  atom | feed.generator.content  >#{atom_feed.generator.content}< : #{atom_feed.generator.content.class.name}"

    # pp atom_feed.generator
    feed.generator.version = atom_feed.generator.version
    feed.generator.url     = atom_feed.generator.uri
    logger.debug "  atom | feed.generator.version  >#{atom_feed.generator.version}< : #{atom_feed.generator.version.class.name}"
    logger.debug "  atom | feed.generator.uri      >#{atom_feed.generator.uri}< : #{atom_feed.generator.uri.class.name}"
  end

  if atom_feed.subtitle
    feed.summary =  handle_content( atom_feed.subtitle, 'feed.subtitle => summary' )
  end


  ## check for authors
  atom_feed.authors.each do |atom_author|
    feed.authors << build_author( atom_author )
  end

  ## check for categories/tags
  atom_feed.categories.each do |atom_cat|
    feed.tags << build_tag( atom_cat )
  end


  atom_feed.items.each do |atom_item|
    feed.items << build_item( atom_item )
  end

  feed # return new feed
end

#build_item(atom_item) ⇒ Object



143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
# File 'lib/feedparser/builder/atom.rb', line 143

def build_item( atom_item )
  item = Item.new   # Item.new

  item.title     = handle_content( atom_item.title, 'item.title' )

  ## Note: item might have many links
  ##   e.g. see blogger (headius)
  ##   <link rel='replies' type='application/atom+xml' href='http://blog.headius.com/feeds/3430080308857860963/comments/default' title='Post Comments'/>
  ##   <link rel='replies' type='text/html' href='http://blog.headius.com/2014/05/jrubyconfeu-2014.html#comment-form' title='0 Comments'/>
  ##   <link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/4704664917418794835/posts/default/3430080308857860963'/>
  ##   <link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/4704664917418794835/posts/default/3430080308857860963'/>
  ##   <link rel='alternate' type='text/html' href='http://blog.headius.com/2014/05/jrubyconfeu-2014.html'

  item.url = nil

  if atom_item.links.size == 1
    item.url       = atom_item.link.href
    logger.debug "  atom | item.link.href  >#{atom_item.link.href}< : #{atom_item.link.href.class.name}"
  else
    ## note: use links (plural to allow multiple links e.g. self,alternate,etc.)
    atom_item.links.each_with_index do |link,i|
      logger.debug "  atom | item.link[#{i+1}]  rel=>#{link.rel}< : #{link.rel.class.name} type=>#{link.type}< href=>#{link.href}<"
      ## for now assume alternate is link or no rel specified (assumes alternate)
      ##   note: only set if feed.url is NOT already set (via <id> for example)
      if item.url.nil? && (link.rel == 'alternate' || link.rel.nil?)
        item.url = link.href
      end
    end
  end


  if atom_item.updated && atom_item.updated.content
    item.updated_local  = handle_date( atom_item.updated, 'item.updated' )
    item.updated        = item.updated_local.utc
  end

  if atom_item.published && atom_item.published.content
    item.published_local  = handle_date( atom_item.published, 'item.published' )
    item.published        = item.published_local.utc
  end


  item.guid       =  atom_item.id.content
  logger.debug "  atom | item.id.content  >#{atom_item.id.content}< : #{atom_item.id.content.class.name}"

  if atom_item.content
    item.content = atom_item.content.content
  end

  if atom_item.summary
    item.summary = handle_content( atom_item.summary, 'item.summary' )
  end

  ## check for authors
  atom_item.authors.each do |atom_author|
    item.authors << build_author( atom_author )
  end

  ## check for categories/tags
  atom_item.categories.each do |atom_cat|
    item.tags << build_tag( atom_cat )
  end


  ## check for attachments / media enclosures
  ###  todo/fix: allow more than one attachment/enclosure
  if atom_item.links
    enclosure = atom_item.links.detect{ |x| x.rel == 'enclosure' }
    if enclosure
      attachment = Attachment.new
      attachment.url    = enclosure.href
      attachment.length = enclosure.length
      attachment.type   = enclosure.type
      item.attachments << attachment
    end
  end

  item
end

#build_tag(atom_cat) ⇒ Object



130
131
132
133
134
135
136
137
138
139
140
# File 'lib/feedparser/builder/atom.rb', line 130

def build_tag( atom_cat )
  ## pp atom_cat
  tag = Tag.new

  ## note: always strip leading n trailing spaces
  ##         and add if preset (not blank/empty e.g. not nil or "")
  tag.name     = atom_cat.term.strip    if atom_cat.term   && !atom_cat.term.empty?
  tag.scheme   = atom_cat.scheme.strip  if atom_cat.scheme && !atom_cat.scheme.empty?

  tag
end

#handle_content(el, name) ⇒ Object

rename to handle_plain_vanilla_text_content - why? why not?



249
250
251
252
253
254
255
256
257
258
259
260
261
# File 'lib/feedparser/builder/atom.rb', line 249

def handle_content( el, name )   ## rename to handle_plain_vanilla_text_content - why? why not?
  ### todo/fix: if type html ?? strip html tags n attributes
  ##    always strip html tags n attributes?? why? why not?

  ## check if content.nil? possible e.g. <title></title> => empty string or nil?

  ## note: dump head (first 30 chars)
  logger.debug "  atom | #{name}.content[0..30] (type=>#{el.type}<)  >#{el.content[0..30]}< : #{el.content.class.name}"

  ## note: always strip leading and trailing whitespaces (spaces/tabs/newlines)
  text = el.content.strip
  text
end

#handle_date(el, name) ⇒ Object



225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
# File 'lib/feedparser/builder/atom.rb', line 225

def handle_date( el, name )
  ## change time to utc if present? why? why not?
  #  --  .utc.strftime( "%Y-%m-%d %H:%M" )

  ###############
  # examples:
  #  2015-01-02 01:56:06 +0100

  logger.debug "  atom | #{name}.content  >#{el.content}< : #{el.content.class.name}"

  # NOTE: empty updated.content possible e.g.  used by google groups feed (e.g. <updated></updated>)
  #   will return nil : NilClass

  ## convert from time to to_datetime  (avoid errors on windows w/ builtin rss lib)
  date = if el.content.nil?
           nil
         else
           el.content.to_datetime
         end

  date
end

#to_feedObject



19
20
21
# File 'lib/feedparser/builder/atom.rb', line 19

def to_feed
  @feed
end