Class: Entry

Inherits:
ActiveRecord::Base
  • Object
show all
Defined in:
app/models/entry.rb

Overview

Schema Information

Table name: entries

id                      :integer(4)      not null, primary key
feed_id                 :integer(4)      not null
permalink               :string(2083)    default(""), not null
author                  :string(2083)
title                   :text            default(""), not null
description             :text
content                 :text
unique_content          :boolean(1)
published_at            :datetime        not null
entry_updated_at        :datetime
harvested_at            :datetime
oai_identifier          :string(2083)
language_id             :integer(4)
direct_link             :string(2083)
indexed_at              :datetime        default(Fri Jan 01 01:01:01 UTC 1971), not null
relevance_calculated_at :datetime        default(Fri Jan 01 01:01:01 UTC 1971), not null
popular                 :text
relevant                :text
other                   :text
grain_size              :string(255)     default("unknown")

Indexes

index_entries_on_direct_link              (direct_link)
index_entries_on_feed_id                  (feed_id)
index_entries_on_indexed_at               (indexed_at)
index_entries_on_language_id              (language_id)
index_entries_on_oai_identifier           (oai_identifier)
index_entries_on_permalink                (permalink)
index_entries_on_published_at             (published_at)
index_entries_on_relevance_calculated_at  (relevance_calculated_at)
index_entries_on_grain_size               (grain_size)

Constant Summary collapse

@@default_time_on_page =
60.0

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.avg_time(clicks, old_avg, time_on_page) ⇒ Object



121
122
123
# File 'app/models/entry.rb', line 121

def self.avg_time(clicks, old_avg, time_on_page)
  return (old_avg*(clicks-1) + time_on_page)/clicks
end

.recommender_entry(uri) ⇒ Object



68
69
70
71
# File 'app/models/entry.rb', line 68

def self.recommender_entry(uri)
  uri = normalized_uri(uri)
  Entry.find(:first, :conditions => ['permalink = ? OR direct_link = ?', uri, uri], :order => 'direct_link IS NULL DESC') || Entry.new(:permalink => uri)
end

.search(search_terms, grain_size = nil, language = "en", limit = 10, offset = 0, operator = :or) ⇒ Object



62
63
64
65
66
# File 'app/models/entry.rb', line 62

def self.search(search_terms, grain_size = nil, language = "en", limit = 10, offset = 0, operator = :or)
  raise MuckRaker::Exceptions::LanguageNotSupported, I18n.t('muck.raker.language_not_supported') unless Recommender::Languages.supported_languages.include?(language)
  query = (!grain_size.nil? && grain_size != 'all') ? (search_terms + ") AND (grain_size:#{grain_size}") : search_terms
  return find_by_solr(query, :limit => limit, :offset => offset, :scores => true, :select => "entries.id, entries.title, entries.permalink, entries.direct_link, entries.published_at, entries.description, entries.feed_id, feeds.short_title AS collection", :joins => "INNER JOIN feeds ON feeds.id = entries.feed_id", :core => language, :operator => operator)
end

.top_tags(tags = nil) ⇒ Object



58
59
60
# File 'app/models/entry.rb', line 58

def self.top_tags(tags = nil)
  
end

.track_click(session, recommendation_id, referrer, redirect_type = "direct_link", requester = "unknown", user_agent = "unknown") ⇒ Object



125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
# File 'app/models/entry.rb', line 125

def self.track_click(session, recommendation_id, referrer, redirect_type = "direct_link", requester = "unknown", user_agent = "unknown")
  # look up the recommendation
  recommendation = Recommendation.find(recommendation_id)
  return "" if !recommendation
  
  # get the entries being linked from and to
  entry = Entry.find(recommendation.entry_id)
  target = Entry.find(recommendation.dest_entry_id)
  
  # get the list of recommendations that have been clicked during this session
  clicks = session[:rids] || Array.new
  
  # redirect to our frame page
  redirect = "/visits/#{recommendation.dest_entry_id}"
  
  # track the time on the last page
  track_time_on_page(session, redirect)
  
  # if this is first time the user clicked on this recommendation during this session 
  if !clicks.include?(recommendation_id)
    
    # add this recommendation to the end of the list 
    clicks << recommendation_id
    session[:rids] = clicks
    
    # update the click time
    recommendation.avg_time_at_dest = ((recommendation.avg_time_at_dest*recommendation.clicks) + @@default_time_on_page)/(recommendation.clicks + 1) 
    recommendation.clicks += 1 
    recommendation.save!
    
    # store info about this click in the session
    now = Time.now
    session[:last_clicked_recommendation] = recommendation_id
    session[:last_clicked_recommendation_time] = now
    session[:last_clicked_recommendation_uri] = redirect

    # update the recommendation cache for the entry
    entry.rank_recommendations if entry
    
    # track the click in the db
    Click.create(:recommendation_id => recommendation_id, :when => now, :referrer => referrer, :requester => requester, :user_agent => user_agent)
  end
  
  return redirect   
end

.track_time_on_page(session, uri) ⇒ Object



99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# File 'app/models/entry.rb', line 99

def self.track_time_on_page(session, uri)
  recommendation_id = session[:last_clicked_recommendation]
  if !recommendation_id.nil?
    time_on_page = (Time.now - session[:last_clicked_recommendation_time].to_f).to_i
    
    # if they spend longer than two minutes on a page, we don't infer anything
    if time_on_page > 5 and time_on_page < 120
      if normalized_uri(uri) != session[:last_clicked_recommendation_uri] 
        recommendation = Recommendation.find(recommendation_id)
        entry = Entry.find(recommendation.entry_id)
        new_avg = (recommendation.avg_time_at_dest*recommendation.clicks - @@default_time_on_page + time_on_page)/recommendation.clicks
        recommendation.avg_time_at_dest = new_avg
        recommendation.save!
        entry.rank_recommendations
        session[:last_clicked_recommendation] = nil
      end
    else
      session[:last_clicked_recommendation] = nil if time_on_page > 5
    end
  end
end

.truncate_words(text, length = 30, end_string = ' ...') ⇒ Object



175
176
177
178
# File 'app/models/entry.rb', line 175

def self.truncate_words(text, length = 30, end_string = ' ...')
  words = text.split()
  words[0..(length-1)].join(' ') + (words.length > length ? end_string : '')
end

Instance Method Details

#calc_click_threshold(recs) ⇒ Object



303
304
305
306
307
308
309
310
311
312
313
314
315
316
# File 'app/models/entry.rb', line 303

def calc_click_threshold(recs)
  sum = 0
  recs.each do |r|
    sum += r["clicks"].to_f
  end
  average = sum/recs.length
  sum = 0
  recs.each do |r|
    sum += (r["clicks"].to_f-average)**2
  end
  standard_deviation = Math.sqrt(sum/recs.length);
  threshold = average + standard_deviation
  return threshold > 5 ? threshold : 5
end

#calc_relevance_threshold(recs) ⇒ Object



289
290
291
292
293
294
295
296
297
298
299
300
301
# File 'app/models/entry.rb', line 289

def calc_relevance_threshold(recs)
  sum = 0
  recs.each do |r|
    sum += r["relevance"].to_f
  end
  average = sum/recs.length
  sum = 0
  recs.each do |r|
    sum += (r["relevance"].to_f-average)**2
  end
  standard_deviation = Math.sqrt(sum/recs.length);
  return average + standard_deviation
end

#json_recommendations(limit = 5, order = "mixed", details = false, omit_feeds = nil) ⇒ Object



236
237
238
# File 'app/models/entry.rb', line 236

def json_recommendations(limit = 5, order = "mixed", details = false, omit_feeds = nil)
  ActiveSupport::JSON.encode(ranked_recommendations(limit, order, details, omit_feeds))
end

#randomize(recs) ⇒ Object



275
276
277
278
279
280
281
282
283
284
285
286
287
# File 'app/models/entry.rb', line 275

def randomize(recs)
  i = recs.length
  return recs if (i == 0)
  while (i > 0)
    i = i - 1
    j = (rand*(i+1)).floor
    ti = recs[i]
    tj =recs[j]
    recs[i] = tj
    recs[j] = ti
  end
  return recs
end

#rank_recommendationsObject



240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
# File 'app/models/entry.rb', line 240

def rank_recommendations
  return
  # get recommendations for the entry from the recommendations table
  recs = self.recommendations
  
  # for storing the various lists
  popular_recs = []
  relevant_recs = []
  other_recs = []

  # see where to cut draw the lines for popular and relevant
  click_threshold = calc_click_threshold(recs)
  relevance_threshold = calc_relevance_threshold(recs)
  
  # store the recommendations
  recs.each do |r|
    if (r["clicks"].to_i > click_threshold)
      popular_recs << r 
    elsif (r["relevance"].to_f > relevance_threshold)
      relevant_recs << r
    else 
      other_recs << r
    end
  end
  
  # order popular items strictly by clicks
  popular_recs.sort{|r1,r2| r2["avg_time_on_target"].to_i <=> r1["avg_time_on_target"].to_i}
  
  # cache the JSON for the lists in the entry record
  self.popular = ActiveSupport::JSON.encode(popular_recs)
  self.relevant = ActiveSupport::JSON.encode(relevant_recs)
  self.other = ActiveSupport::JSON.encode(other_recs)
  self.save!
end

#ranked_recommendations(limit = 5, order = "mixed", details = false, omit_feeds = nil) ⇒ Object



180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
# File 'app/models/entry.rb', line 180

def ranked_recommendations(limit = 5, order = "mixed", details = false, omit_feeds = nil)
#    if self.id.nil?
#      return Entry.real_time_recommendations(self.permalink, details, :core => 'en', :limit => limit)
#    else
    return relevant_recommendations(limit, "clicks DESC, relevance", details, omit_feeds) if order == "clicks" 
    return relevant_recommendations(limit, "relevance", details, omit_feeds) if (order == "relevance" || details == true)
    return relevant_recommendations_filtered(limit, details, omit_feeds) if omit_feeds != nil 
#    end

  recs = []
  if self.popular != nil && !self.popular.empty?
    recs.concat(ActiveSupport::JSON.decode(self.popular).first(limit))
  end
  if recs.length < limit && self.relevant != nil && !self.relevant.empty?
    relevant_recs = randomize(ActiveSupport::JSON.decode(self.relevant))
    recs.concat(relevant_recs.first(limit - recs.length))
  end
  if recs.length < limit && self.relevant != nil && !self.other.empty?
    other_recs = randomize(ActiveSupport::JSON.decode(self.other))
    recs.concat(other_recs.first(limit - recs.length))
  end
  return recs
end

#recommendation_entries(limit = 20, order = "relevance", details = false, omit_feeds = nil) ⇒ Object



86
87
88
89
90
91
92
93
94
95
96
97
# File 'app/models/entry.rb', line 86

def recommendation_entries(limit = 20, order = "relevance", details = false, omit_feeds = nil)
  sql = "SELECT recommendations.dest_entry_id AS id, entries.permalink, entries.title, entries.description, entries.direct_link, feeds.short_title AS collection "
  sql << ", relevance_calculated_at, relevance, clicks, avg_time_at_dest AS avg_time_on_target, author, published_at " if details == true
  sql << "FROM recommendations "
  sql << "INNER JOIN entries ON recommendations.dest_entry_id = entries.id "
  sql << "INNER JOIN feeds ON entries.feed_id = feeds.id "
  sql << "WHERE recommendations.entry_id = ? "
  sql << ("AND entries.feed_id NOT IN (" + omit_feeds.gsub(/[^0-9,]/,'') + ") ") if omit_feeds != nil
  sql << "ORDER BY " + order + " DESC "
  sql << "LIMIT " + limit.to_s
  Entry.find_by_sql([sql,self.id])
end

#recommendations(limit = 20, order = "relevance", details = false, omit_feeds = nil) ⇒ Object



73
74
75
76
77
78
79
80
81
82
83
84
# File 'app/models/entry.rb', line 73

def recommendations(limit = 20, order = "relevance", details = false, omit_feeds = nil)
  sql = "SELECT recommendations.id, dest_entry_id, entries.permalink, entries.title, entries.description, entries.direct_link, feeds.short_title AS collection "
  sql << ", relevance_calculated_at, relevance, clicks, avg_time_at_dest AS avg_time_on_target, author, published_at " if details == true
  sql << "FROM recommendations "
  sql << "INNER JOIN entries ON recommendations.dest_entry_id = entries.id "
  sql << "INNER JOIN feeds ON entries.feed_id = feeds.id "
  sql << "WHERE recommendations.entry_id = ? "
  sql << ("AND entries.feed_id NOT IN (" + omit_feeds.gsub(/[^0-9,]/,'') + ") ") if omit_feeds != nil
  sql << "ORDER BY " + order + " DESC "
  sql << "LIMIT " + limit.to_s
  Entry.find_by_sql([sql,self.id])
end

#relevant_recommendations(limit = 5, order = "relevance", details = false, omit_feeds = nil) ⇒ Object



171
172
173
# File 'app/models/entry.rb', line 171

def relevant_recommendations(limit = 5, order = "relevance", details = false, omit_feeds = nil)
  return self.recommendations(limit, order, details, omit_feeds)
end

#relevant_recommendations_filtered(limit, details, omit_feeds) ⇒ Object



204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
# File 'app/models/entry.rb', line 204

def relevant_recommendations_filtered(limit, details, omit_feeds)
  # get recommendations for the entry from the recommendations table
  recs = self.recommendations(limit, "mixed", details, omit_feeds)
  
  # for storing the various lists
  popular_recs = []
  relevant_recs = []
  other_recs = []

  # see where to cut draw the lines for popular and relevant
  click_threshold = calc_click_threshold(recs)
  relevance_threshold = calc_relevance_threshold(recs)
  
  # store the recommendations
  recs.each do |r|
    if (r["clicks"].to_i > click_threshold)
      popular_recs << r 
    elsif (r["relevance"].to_f > relevance_threshold)
      relevant_recs << r
    else 
      other_recs << r
    end
  end
  
  # order popular items strictly by clicks
  popular_recs.sort{|r1,r2| r2["avg_time_on_target"].to_i <=> r1["avg_time_on_target"].to_i}
  
  return popular_recs[0..limit] if popular_recs.size > limit 
  return (popular_recs + relevant_recs)[0..limit] if (popular_recs.size + relevant_recs.size) > limit
  return (popular_recs + relevant_recs + other_recs)[0..limit]
end

#resource_uriObject



54
55
56
# File 'app/models/entry.rb', line 54

def resource_uri 
  self.direct_link.nil? ? self.permalink : self.direct_link
end