Class: Pluto::Model::Feed

Inherits:
ActiveRecord::Base
  • Object
show all
Includes:
TextUtils::HypertextHelper
Defined in:
lib/pluto/models/feed.rb

Defined Under Namespace

Classes: Data

Constant Summary collapse

FIX_DATE_SLUG_RE =

try to get date from slug in url

e.g. /news/2019-10-17-growing-ruby-together
%r{\b
  (?<year>[0-9]{4})
     -
  (?<month>[0-9]{2})
     -
  (?<day>[0-9]{2})   
\b}x

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.latestObject



32
33
34
35
36
37
38
39
# File 'lib/pluto/models/feed.rb', line 32

def self.latest
  # note: order by first non-null datetime field
  #   coalesce - supported by sqlite (yes), postgres (yes)

  # note: if not updated or published use hardcoded 1970-01-01 for now
  ## was: order( "coalesce(updated,published,'1970-01-01') desc" )
  order( Arel.sql( "coalesce(feeds.items_last_updated,'1970-01-01') desc" ) )
end

Instance Method Details

#dataObject

use a different name for data - why? why not?

e.g. inner, internal, readonly or r, raw, table, direct, or ???


123
# File 'lib/pluto/models/feed.rb', line 123

def data()   @data ||= Data.new( self ); end

#dateObject

add convenience date attribute helpers / readers

- what to return if date is nil? - return nil or empty string or 'n/a' or '?' - why? why not?

date date_iso | date_iso8601 date_822 | date_rfc2822 | date_rfc822



93
# File 'lib/pluto/models/feed.rb', line 93

def date()        updated; end

#date_822Object Also known as: date_rfc2822, date_rfc822



98
# File 'lib/pluto/models/feed.rb', line 98

def date_822()    date ? date.rfc822 : ''; end

#date_isoObject Also known as: date_iso8601



95
# File 'lib/pluto/models/feed.rb', line 95

def date_iso()    date ? date.iso8601 : ''; end

#debug?Boolean

Returns:

  • (Boolean)


16
# File 'lib/pluto/models/feed.rb', line 16

def debug?()  Pluto.config.debug?;  end

#deep_update_from_struct!(data) ⇒ Object



126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
# File 'lib/pluto/models/feed.rb', line 126

def deep_update_from_struct!( data )

  logger = LogUtils::Logger.root

  ## note: handle case with empty feed, that is, feed with NO items / entries
  ##                                                    (e.g. data.items.size == 0).
  if data.items.size > 0

    #####
    ## apply some fix-up for "broken" feed data
    fix_dates( data )
    
    
    ######
    ## check for filters (includes/excludes) if present
    ##  for now just check for includes
    ##
    if includes.present?
      includesFilter = FeedFilter::IncludeFilters.new( includes )
    else
      includesFilter = nil
    end

    data.items.each do |item|
      if includesFilter && includesFilter.match_item?( item ) == false
        logger.info "** SKIPPING | #{item.title}"
        logger.info "  no include terms match: #{includes}"
        next   ## skip to next item
      end

      item_rec = Item.find_by_guid( item.guid )
      if item_rec.nil?
        item_rec  = Item.new
        logger.info "** NEW | #{item.title}"
      else
        ## todo: check if any attribs changed
        logger.info "UPDATE | #{item.title}"
      end

      item_rec.feed_id = id        # feed_rec.id - add feed_id fk_ref
      item_rec.fetched = fetched   # feed_rec.fetched

      item_rec.update_from_struct!( item )
    end  # each item


    ###
    #  delete (old) feed items if no longer in feed AND
    #   date range is in (lastest/current) feed list
    #
    #  thanks to Harry Wood
    #   see https://github.com/feedreader/pluto/pull/16
    #    for more comments

    #  todo/fix: use a delete feature/command line flag to make it optional - why? why not?

    guids_in_feed = data.items.map {|item| item.guid }
    earliest_still_in_feed = data.items.min_by {|item| item.published }.published

    items_no_longer_present =
      Item
        .where(feed_id: id)
        .where.not(published: nil)
        .where("published > ?", earliest_still_in_feed)
        .where.not(guid: guids_in_feed)

    unless items_no_longer_present.empty?
      logger.info "#{items_no_longer_present.size} items no longer present in the feed (presumed removed at source). Deleting from planet db"
      items_no_longer_present.each do |item|
        logger.info "** DELETE | #{item.title}"
        item.destroy
      end
    end


    #  update  cached value last published for item
    ##  todo/check: force reload of items - why? why not??
    last_item_rec = items.latest.limit(1).first  # note limit(1) will return relation/arrar - use first to get first element or nil from ary
    if last_item_rec
      if last_item_rec.updated?   ## note: checks for updated & published with attr_reader_w_fallback
        self.items_last_updated = last_item_rec.updated
        ## save!  ## note: will get save w/ update_from_struct!  - why? why not??
      else
        ## skip - no updated / published present
      end
    end
  end  # check for if data.items.size > 0  (that is, feed has feed items/entries)

  update_from_struct!( data )
end

#feed_urlObject



69
# File 'lib/pluto/models/feed.rb', line 69

def feed_url()   read_attribute_w_fallbacks( :feed_url, :auto_feed_url ); end

#feed_url?Boolean

Returns:

  • (Boolean)


73
# File 'lib/pluto/models/feed.rb', line 73

def feed_url?()  feed_url.present?;  end

#fix_dates(data) ⇒ Object

helpers to fix-up some “broken” feed data



230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
# File 'lib/pluto/models/feed.rb', line 230

def fix_dates( data )

  ## check for missing / no dates 
  ##   examples
  ##    - rubytogether feed @ https://rubytogether.org/news.xml
  data.items.each do |item|
    if item.updated.nil?  &&
       item.published.nil?
        ## try to get date from slug in url
        ##  e.g. /news/2019-10-17-growing-ruby-together
        if (m=FIX_DATE_SLUG_RE.match( item.url ))
          ## todo/fix: make sure DateTime gets utc (no timezone/offset +000)
          published = DateTime.new( m[:year].to_i(10),
                                    m[:month].to_i(10),
                                    m[:day].to_i(10) )
          item.published_local  = published
          item.published        = published
        end
    end
  end

  
  ## check if all updated dates are the same (uniq count is 1)
  ##   AND if all published dates are present
  ##  than assume "fake" updated dates and nullify updated dates
  ##   example real-world "messed-up" feeds include:
  ##   -  https://bundler.io/blog/feed.xml
  ##   -  https://dry-rb.org/feed.xml
  ##
  ##  todo/check - limit to atom feed format only - why? why not?

  count           = data.items.size
  count_published = data.items.reduce( 0 ) {|count,item| count += 1 if item.published; count }

  if count == count_published
    uniq_count_updated  = 0
    last_updated        = nil

    data.items.each do |item|
      uniq_count_updated += 1   if item.updated != last_updated
      last_updated = item.updated
    end

    if uniq_count_updated == 1
      puts "bingo!! nullify all updated dates"
      ## todo/fix: log report updated date fix!!!!
      data.items.each do |item|
        item.updated       = nil
        item.updated_local = nil
      end
    end
  end
end

#publishedObject



80
# File 'lib/pluto/models/feed.rb', line 80

def published()  read_attribute_w_fallbacks( :published, :updated, );   end

#published?Boolean

Returns:

  • (Boolean)


83
# File 'lib/pluto/models/feed.rb', line 83

def published?() published.present?;  end

#titleObject



68
# File 'lib/pluto/models/feed.rb', line 68

def title()      read_attribute_w_fallbacks( :title,    :auto_title );    end

#title?Boolean

Returns:

  • (Boolean)


72
# File 'lib/pluto/models/feed.rb', line 72

def title?()     title.present?;     end

#update_from_struct!(data) ⇒ Object



285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
# File 'lib/pluto/models/feed.rb', line 285

def update_from_struct!( data )
  logger = LogUtils::Logger.root

##
# todo:
##  strip all tags from summary (subtitle)
##  limit to 255 chars
## e.g. summary (subtitle) such as this exist
##  This is a low-traffic announce-only list for people interested
##  in hearing news about Polymer (<a href="http://polymer-project.org">http://polymer-project.org</a>).
## The higher-traffic mailing list for all kinds of discussion is
##  <a href="https://groups.google.com/group/polymer-dev">https://groups.google.com/group/polymer-dev</a>

  feed_attribs = {
      format:         data.format,
      updated:        data.updated,
      published:      data.published,
      summary:        data.summary,
      generator:      data.generator.to_s,    ## note: use single-line/string generator stringified -- might return null (if no data)

      ## note: always auto-update auto_* fields for now
      auto_title:     data.title,
      auto_url:       data.url,
      auto_feed_url:  data.feed_url,
    }

  if debug?
      ## puts "*** dump feed_attribs:"
      ## pp feed_attribs
      logger.debug "*** dump feed_attribs w/ class types:"
      feed_attribs.each do |key,value|
        logger.debug "  #{key}: >#{value}< : #{value.class.name}"
      end
  end

  update!( feed_attribs )
end

#updatedObject

note:

only use fallback for updated, that is, updated (or published)
 ~~do NOT use fallback for published / created    -- why? why not?~~
 add items_last_updated  to updated as last fall back - why? why not?


79
# File 'lib/pluto/models/feed.rb', line 79

def updated()    read_attribute_w_fallbacks( :updated,   :published );  end

#updated?Boolean

Returns:

  • (Boolean)


82
# File 'lib/pluto/models/feed.rb', line 82

def updated?()   updated.present?;  end

#urlObject

attributes with fallbacks or (auto-)backups - use feed.data.<attribute> for “raw” / “original” access



67
# File 'lib/pluto/models/feed.rb', line 67

def url()        read_attribute_w_fallbacks( :url,      :auto_url );      end

#url?Boolean

Returns:

  • (Boolean)


71
# File 'lib/pluto/models/feed.rb', line 71

def url?()       url.present?;       end