Class: Entry

Inherits:
ActiveRecord::Base
  • Object
show all
Defined in:
app/models/entry.rb

Overview

Schema Information

Table name: entries

id                      :integer(4)      not null, primary key
feed_id                 :integer(4)      not null
permalink               :string(2083)    default(""), not null
author                  :string(2083)
title                   :text            default(""), not null
description             :text
content                 :text
unique_content          :boolean(1)
published_at            :datetime        not null
entry_updated_at        :datetime
harvested_at            :datetime
oai_identifier          :string(2083)
language_id             :integer(4)
direct_link             :string(2083)
indexed_at              :datetime        default(Fri Jan 01 01:01:01 UTC 1971), not null
relevance_calculated_at :datetime        default(Fri Jan 01 01:01:01 UTC 1971), not null
popular                 :text
relevant                :text
other                   :text
grain_size              :string(255)     default("unknown")

Indexes

index_entries_on_direct_link              (direct_link)
index_entries_on_feed_id                  (feed_id)
index_entries_on_indexed_at               (indexed_at)
index_entries_on_language_id              (language_id)
index_entries_on_oai_identifier           (oai_identifier)
index_entries_on_permalink                (permalink)
index_entries_on_published_at             (published_at)
index_entries_on_relevance_calculated_at  (relevance_calculated_at)
index_entries_on_grain_size               (grain_size)

Constant Summary collapse

@@default_time_on_page =
60.0

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.avg_time(clicks, old_avg, time_on_page) ⇒ Object



120
121
122
# File 'app/models/entry.rb', line 120

def self.avg_time(clicks, old_avg, time_on_page)
  return (old_avg*(clicks-1) + time_on_page)/clicks
end

.recommender_entry(uri) ⇒ Object



67
68
69
70
# File 'app/models/entry.rb', line 67

def self.recommender_entry(uri)
  uri = normalized_uri(uri)
  Entry.find(:first, :conditions => ['permalink = ? OR direct_link = ?', uri, uri], :order => 'direct_link IS NULL DESC') || Entry.new(:permalink => uri)
end

.search(search_terms, grain_size = nil, language = "en", limit = 10, offset = 0, operator = :or) ⇒ Object



62
63
64
65
# File 'app/models/entry.rb', line 62

def self.search(search_terms, grain_size = nil, language = "en", limit = 10, offset = 0, operator = :or)
  query = (!grain_size.nil? && grain_size != 'all') ? (search_terms + ") AND (grain_size:#{grain_size}") : search_terms
  return find_by_solr(query, :limit => limit, :offset => offset, :scores => true, :select => "entries.id, entries.title, entries.permalink, entries.direct_link, entries.published_at, entries.description, entries.feed_id, feeds.short_title AS collection", :joins => "INNER JOIN feeds ON feeds.id = entries.feed_id", :core => language, :operator => operator)
end

.top_tags(tags = nil) ⇒ Object



58
59
60
# File 'app/models/entry.rb', line 58

def self.top_tags(tags = nil)
  
end

.track_click(session, recommendation_id, referrer, redirect_type = "direct_link", requester = "unknown", user_agent = "unknown") ⇒ Object



124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
# File 'app/models/entry.rb', line 124

def self.track_click(session, recommendation_id, referrer, redirect_type = "direct_link", requester = "unknown", user_agent = "unknown")
  # look up the recommendation
  recommendation = Recommendation.find(recommendation_id)
  return "" if !recommendation
  
  # get the entries being linked from and to
  entry = Entry.find(recommendation.entry_id)
  target = Entry.find(recommendation.dest_entry_id)
  
  # get the list of recommendations that have been clicked during this session
  clicks = session[:rids] || Array.new
  
  # redirect to our frame page
  redirect = "/visits/#{recommendation.dest_entry_id}"
  
  # track the time on the last page
  track_time_on_page(session, redirect)
  
  # if this is first time the user clicked on this recommendation during this session 
  if !clicks.include?(recommendation_id)
    
    # add this recommendation to the end of the list 
    clicks << recommendation_id
    session[:rids] = clicks
    
    # update the click time
    recommendation.avg_time_at_dest = ((recommendation.avg_time_at_dest*recommendation.clicks) + @@default_time_on_page)/(recommendation.clicks + 1) 
    recommendation.clicks += 1 
    recommendation.save!
    
    # store info about this click in the session
    now = Time.now
    session[:last_clicked_recommendation] = recommendation_id
    session[:last_clicked_recommendation_time] = now
    session[:last_clicked_recommendation_uri] = redirect

    # update the recommendation cache for the entry
    entry.rank_recommendations if entry
    
    # track the click in the db
    Click.create(:recommendation_id => recommendation_id, :when => now, :referrer => referrer, :requester => requester, :user_agent => user_agent)
  end
  
  return redirect   
end

.track_time_on_page(session, uri) ⇒ Object



98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# File 'app/models/entry.rb', line 98

def self.track_time_on_page(session, uri)
  recommendation_id = session[:last_clicked_recommendation]
  if !recommendation_id.nil?
    time_on_page = (Time.now - session[:last_clicked_recommendation_time].to_f).to_i
    
    # if they spend longer than two minutes on a page, we don't infer anything
    if time_on_page > 5 and time_on_page < 120
      if normalized_uri(uri) != session[:last_clicked_recommendation_uri] 
        recommendation = Recommendation.find(recommendation_id)
        entry = Entry.find(recommendation.entry_id)
        new_avg = (recommendation.avg_time_at_dest*recommendation.clicks - @@default_time_on_page + time_on_page)/recommendation.clicks
        recommendation.avg_time_at_dest = new_avg
        recommendation.save!
        entry.rank_recommendations
        session[:last_clicked_recommendation] = nil
      end
    else
      session[:last_clicked_recommendation] = nil if time_on_page > 5
    end
  end
end

.truncate_words(text, length = 30, end_string = ' ...') ⇒ Object



174
175
176
177
# File 'app/models/entry.rb', line 174

def self.truncate_words(text, length = 30, end_string = ' ...')
  words = text.split()
  words[0..(length-1)].join(' ') + (words.length > length ? end_string : '')
end

Instance Method Details

#calc_click_threshold(recs) ⇒ Object



302
303
304
305
306
307
308
309
310
311
312
313
314
315
# File 'app/models/entry.rb', line 302

def calc_click_threshold(recs)
  sum = 0
  recs.each do |r|
    sum += r["clicks"].to_f
  end
  average = sum/recs.length
  sum = 0
  recs.each do |r|
    sum += (r["clicks"].to_f-average)**2
  end
  standard_deviation = Math.sqrt(sum/recs.length);
  threshold = average + standard_deviation
  return threshold > 5 ? threshold : 5
end

#calc_relevance_threshold(recs) ⇒ Object



288
289
290
291
292
293
294
295
296
297
298
299
300
# File 'app/models/entry.rb', line 288

def calc_relevance_threshold(recs)
  sum = 0
  recs.each do |r|
    sum += r["relevance"].to_f
  end
  average = sum/recs.length
  sum = 0
  recs.each do |r|
    sum += (r["relevance"].to_f-average)**2
  end
  standard_deviation = Math.sqrt(sum/recs.length);
  return average + standard_deviation
end

#json_recommendations(limit = 5, order = "mixed", details = false, omit_feeds = nil) ⇒ Object



235
236
237
# File 'app/models/entry.rb', line 235

def json_recommendations(limit = 5, order = "mixed", details = false, omit_feeds = nil)
  ActiveSupport::JSON.encode(ranked_recommendations(limit, order, details, omit_feeds))
end

#randomize(recs) ⇒ Object



274
275
276
277
278
279
280
281
282
283
284
285
286
# File 'app/models/entry.rb', line 274

def randomize(recs)
  i = recs.length
  return recs if (i == 0)
  while (i > 0)
    i = i - 1
    j = (rand*(i+1)).floor
    ti = recs[i]
    tj =recs[j]
    recs[i] = tj
    recs[j] = ti
  end
  return recs
end

#rank_recommendationsObject



239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
# File 'app/models/entry.rb', line 239

def rank_recommendations
  return
  # get recommendations for the entry from the recommendations table
  recs = self.recommendations
  
  # for storing the various lists
  popular_recs = []
  relevant_recs = []
  other_recs = []

  # see where to cut draw the lines for popular and relevant
  click_threshold = calc_click_threshold(recs)
  relevance_threshold = calc_relevance_threshold(recs)
  
  # store the recommendations
  recs.each do |r|
    if (r["clicks"].to_i > click_threshold)
      popular_recs << r 
    elsif (r["relevance"].to_f > relevance_threshold)
      relevant_recs << r
    else 
      other_recs << r
    end
  end
  
  # order popular items strictly by clicks
  popular_recs.sort{|r1,r2| r2["avg_time_on_target"].to_i <=> r1["avg_time_on_target"].to_i}
  
  # cache the JSON for the lists in the entry record
  self.popular = ActiveSupport::JSON.encode(popular_recs)
  self.relevant = ActiveSupport::JSON.encode(relevant_recs)
  self.other = ActiveSupport::JSON.encode(other_recs)
  self.save!
end

#ranked_recommendations(limit = 5, order = "mixed", details = false, omit_feeds = nil) ⇒ Object



179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
# File 'app/models/entry.rb', line 179

def ranked_recommendations(limit = 5, order = "mixed", details = false, omit_feeds = nil)
  if self.id.nil?
    return Entry.real_time_recommendations(self.permalink, details, :core => 'en', :limit => limit) 
  else
    return relevant_recommendations(limit, "clicks DESC, relevance", details, omit_feeds) if order == "clicks" 
    return relevant_recommendations(limit, "relevance", details, omit_feeds) if (order == "relevance" || details == true)
    return relevant_recommendations_filtered(limit, details, omit_feeds) if omit_feeds != nil 
  end

  recs = []
  if self.popular != nil && !self.popular.empty?
    recs.concat(ActiveSupport::JSON.decode(self.popular).first(limit))
  end
  if recs.length < limit && self.relevant != nil && !self.relevant.empty?
    relevant_recs = randomize(ActiveSupport::JSON.decode(self.relevant))
    recs.concat(relevant_recs.first(limit - recs.length))
  end
  if recs.length < limit && self.relevant != nil && !self.other.empty?
    other_recs = randomize(ActiveSupport::JSON.decode(self.other))
    recs.concat(other_recs.first(limit - recs.length))
  end
  return recs
end

#recommendation_entries(limit = 20, order = "relevance", details = false, omit_feeds = nil) ⇒ Object



85
86
87
88
89
90
91
92
93
94
95
96
# File 'app/models/entry.rb', line 85

def recommendation_entries(limit = 20, order = "relevance", details = false, omit_feeds = nil)
  sql = "SELECT recommendations.dest_entry_id AS id, entries.permalink, entries.title, entries.description, entries.direct_link, feeds.short_title AS collection "
  sql << ", relevance_calculated_at, relevance, clicks, avg_time_at_dest AS avg_time_on_target, author, published_at " if details == true
  sql << "FROM recommendations "
  sql << "INNER JOIN entries ON recommendations.dest_entry_id = entries.id "
  sql << "INNER JOIN feeds ON entries.feed_id = feeds.id "
  sql << "WHERE recommendations.entry_id = ? "
  sql << ("AND entries.feed_id NOT IN (" + omit_feeds.gsub(/[^0-9,]/,'') + ") ") if omit_feeds != nil
  sql << "ORDER BY " + order + " DESC "
  sql << "LIMIT " + limit.to_s
  Entry.find_by_sql([sql,self.id])
end

#recommendations(limit = 20, order = "relevance", details = false, omit_feeds = nil) ⇒ Object



72
73
74
75
76
77
78
79
80
81
82
83
# File 'app/models/entry.rb', line 72

def recommendations(limit = 20, order = "relevance", details = false, omit_feeds = nil)
  sql = "SELECT recommendations.id, dest_entry_id, entries.permalink, entries.title, entries.description, entries.direct_link, feeds.short_title AS collection "
  sql << ", relevance_calculated_at, relevance, clicks, avg_time_at_dest AS avg_time_on_target, author, published_at " if details == true
  sql << "FROM recommendations "
  sql << "INNER JOIN entries ON recommendations.dest_entry_id = entries.id "
  sql << "INNER JOIN feeds ON entries.feed_id = feeds.id "
  sql << "WHERE recommendations.entry_id = ? "
  sql << ("AND entries.feed_id NOT IN (" + omit_feeds.gsub(/[^0-9,]/,'') + ") ") if omit_feeds != nil
  sql << "ORDER BY " + order + " DESC "
  sql << "LIMIT " + limit.to_s
  Entry.find_by_sql([sql,self.id])
end

#relevant_recommendations(limit = 5, order = "relevance", details = false, omit_feeds = nil) ⇒ Object



170
171
172
# File 'app/models/entry.rb', line 170

def relevant_recommendations(limit = 5, order = "relevance", details = false, omit_feeds = nil)
  return self.recommendations(limit, order, details, omit_feeds)
end

#relevant_recommendations_filtered(limit, details, omit_feeds) ⇒ Object



203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
# File 'app/models/entry.rb', line 203

def relevant_recommendations_filtered(limit, details, omit_feeds)
  # get recommendations for the entry from the recommendations table
  recs = self.recommendations(limit, "mixed", details, omit_feeds)
  
  # for storing the various lists
  popular_recs = []
  relevant_recs = []
  other_recs = []

  # see where to cut draw the lines for popular and relevant
  click_threshold = calc_click_threshold(recs)
  relevance_threshold = calc_relevance_threshold(recs)
  
  # store the recommendations
  recs.each do |r|
    if (r["clicks"].to_i > click_threshold)
      popular_recs << r 
    elsif (r["relevance"].to_f > relevance_threshold)
      relevant_recs << r
    else 
      other_recs << r
    end
  end
  
  # order popular items strictly by clicks
  popular_recs.sort{|r1,r2| r2["avg_time_on_target"].to_i <=> r1["avg_time_on_target"].to_i}
  
  return popular_recs[0..limit] if popular_recs.size > limit 
  return (popular_recs + relevant_recs)[0..limit] if (popular_recs.size + relevant_recs.size) > limit
  return (popular_recs + relevant_recs + other_recs)[0..limit]
end

#resource_uriObject



54
55
56
# File 'app/models/entry.rb', line 54

def resource_uri 
  self.direct_link.nil? ? self.permalink : self.direct_link
end