Class: Entry

Inherits:
ActiveRecord::Base
  • Object
show all
Defined in:
app/models/entry.rb

Overview

Schema Information

Table name: entries

id                      :integer(4)      not null, primary key
feed_id                 :integer(4)      not null
permalink               :string(2083)    default(""), not null
author                  :string(2083)
title                   :text            default(""), not null
description             :text
content                 :text
unique_content          :boolean(1)
published_at            :datetime        not null
entry_updated_at        :datetime
harvested_at            :datetime
oai_identifier          :string(2083)
language_id             :integer(4)
direct_link             :string(2083)
indexed_at              :datetime        default(Fri Jan 01 01:01:01 UTC 1971), not null
relevance_calculated_at :datetime        default(Fri Jan 01 01:01:01 UTC 1971), not null
popular                 :text
relevant                :text
other                   :text
grain_size              :string(255)     default("unknown")

Constant Summary collapse

@@default_time_on_page =
60.0

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.avg_time(clicks, old_avg, time_on_page) ⇒ Object



105
106
107
# File 'app/models/entry.rb', line 105

def self.avg_time(clicks, old_avg, time_on_page)
  return (old_avg*(clicks-1) + time_on_page)/clicks
end

.normalized_uri(uri) ⇒ Object



47
48
49
# File 'app/models/entry.rb', line 47

def self.normalized_uri(uri)
  uri.sub(/index.?\.(html|aspx|shtm|htm|asp|php|cfm|jsp|shtml|jhtml)$/, '')
end

.recommender_entry(uri) ⇒ Object



51
52
53
54
55
# File 'app/models/entry.rb', line 51

def self.recommender_entry(uri)
  uri = normalized_uri(uri)
  sql = "SELECT * FROM entries WHERE permalink = ? OR direct_link = ?"
  Entry.find_by_sql([sql,uri,uri]).first
end

.redirect_uri(target, referrer, redirect_type) ⇒ Object



109
110
111
112
113
114
115
116
117
118
# File 'app/models/entry.rb', line 109

def self.redirect_uri(target, referrer, redirect_type)
  if !target.direct_link.nil? and redirect_type != "metadata"
    return target.direct_link if redirect_type == "direct_link" 
    if !referrer.nil?
      domain = "http://" + URI.parse(referrer).host
      return target.direct_link if target.permalink[0..domain.length-1] != domain
    end
  end
  return target.permalink
end

.search(search_terms, grain_size = nil, language = "en", limit = 10, offset = 0, operator = :or) ⇒ Object



42
43
44
45
# File 'app/models/entry.rb', line 42

def self.search(search_terms, grain_size = nil, language = "en", limit = 10, offset = 0, operator = :or)
  query = (!grain_size.nil? && grain_size != 'all') ? (search_terms + ") AND (grain_size:#{grain_size}") : search_terms
  return find_by_solr(query, :limit => limit, :offset => offset, :scores => true, :select => "entries.id, entries.title, entries.permalink, entries.direct_link, entries.published_at, entries.description, entries.feed_id, feeds.short_title AS collection", :joins => "INNER JOIN feeds ON feeds.id = entries.feed_id", :core => language, :operator => operator)
end

.top_tags(tags = nil) ⇒ Object



38
39
40
# File 'app/models/entry.rb', line 38

def self.top_tags(tags = nil)
  
end

.track_click(session, recommendation_id, referrer, redirect_type = "direct_link", requester = "unknown", user_agent = "unknown") ⇒ Object



120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
# File 'app/models/entry.rb', line 120

def self.track_click(session, recommendation_id, referrer, redirect_type = "direct_link", requester = "unknown", user_agent = "unknown")
  # look up the recommendation
  recommendation = Recommendation.find(recommendation_id)
  return "" if !recommendation
  
  # get the entries being linked from and to
  entry = Entry.find(recommendation.entry_id)
  target = Entry.find(recommendation.dest_entry_id)
  
  # get the list of recommendations that have been clicked during this session
  clicks = session[:rids] || Array.new
  
  # find out where we are going
  redirect = redirect_uri(target, referrer, redirect_type)
  
  # track the time on the last page
  track_time_on_page(session, redirect)
  
  # if this is first time the user clicked on this recommendation during this session 
  if !clicks.include?(recommendation_id)
    
    # add this recommendation to the end of the list 
    clicks << recommendation_id
    session[:rids] = clicks
    
    # update the click time
    recommendation.avg_time_at_dest = ((recommendation.avg_time_at_dest*recommendation.clicks) + @@default_time_on_page)/(recommendation.clicks + 1) 
    recommendation.clicks += 1 
    recommendation.save!
    
    # store info about this click in the session
    now = Time.now
    session[:last_clicked_recommendation] = recommendation_id
    session[:last_clicked_recommendation_time] = now
    session[:last_clicked_recommendation_uri] = redirect

    # update the recommendation cache for the entry
    entry.rank_recommendations if entry
    
    # track the click in the db
    Click.create(:recommendation_id => recommendation_id, :when => now, :referrer => referrer, :requester => requester, :user_agent => user_agent)
  end
  
  return redirect   
end

.track_time_on_page(session, uri) ⇒ Object



83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# File 'app/models/entry.rb', line 83

def self.track_time_on_page(session, uri)
  recommendation_id = session[:last_clicked_recommendation]
  if !recommendation_id.nil?
    time_on_page = (Time.now - session[:last_clicked_recommendation_time].to_f).to_i
    
    # if they spend longer than two minutes on a page, we don't infer anything
    if time_on_page > 5 and time_on_page < 120
      if normalized_uri(uri) != session[:last_clicked_recommendation_uri] 
        recommendation = Recommendation.find(recommendation_id)
        entry = Entry.find(recommendation.entry_id)
        new_avg = (recommendation.avg_time_at_dest*recommendation.clicks - @@default_time_on_page + time_on_page)/recommendation.clicks
        recommendation.avg_time_at_dest = new_avg
        recommendation.save!
        entry.rank_recommendations
        session[:last_clicked_recommendation] = nil
      end
    else
      session[:last_clicked_recommendation] = nil if time_on_page > 5
    end
  end
end

.truncate_words(text, length = 30, end_string = ' ...') ⇒ Object



170
171
172
173
# File 'app/models/entry.rb', line 170

def self.truncate_words(text, length = 30, end_string = ' ...')
  words = text.split()
  words[0..(length-1)].join(' ') + (words.length > length ? end_string : '')
end

Instance Method Details

#calc_click_threshold(recs) ⇒ Object



294
295
296
297
298
299
300
301
302
303
304
305
306
307
# File 'app/models/entry.rb', line 294

def calc_click_threshold(recs)
  sum = 0
  recs.each do |r|
    sum += r["clicks"].to_f
  end
  average = sum/recs.length
  sum = 0
  recs.each do |r|
    sum += (r["clicks"].to_f-average)**2
  end
  standard_deviation = Math.sqrt(sum/recs.length);
  threshold = average + standard_deviation
  return threshold > 5 ? threshold : 5
end

#calc_relevance_threshold(recs) ⇒ Object



280
281
282
283
284
285
286
287
288
289
290
291
292
# File 'app/models/entry.rb', line 280

def calc_relevance_threshold(recs)
  sum = 0
  recs.each do |r|
    sum += r["relevance"].to_f
  end
  average = sum/recs.length
  sum = 0
  recs.each do |r|
    sum += (r["relevance"].to_f-average)**2
  end
  standard_deviation = Math.sqrt(sum/recs.length);
  return average + standard_deviation
end

#json_recommendations(limit = 5, order = "mixed", details = false, omit_feeds = nil) ⇒ Object



227
228
229
# File 'app/models/entry.rb', line 227

def json_recommendations(limit = 5, order = "mixed", details = false, omit_feeds = nil)
  ActiveSupport::JSON.encode(ranked_recommendations(limit, order, details, omit_feeds))
end

#randomize(recs) ⇒ Object



266
267
268
269
270
271
272
273
274
275
276
277
278
# File 'app/models/entry.rb', line 266

def randomize(recs)
  i = recs.length
  return recs if (i == 0)
  while (i > 0)
    i = i - 1
    j = (rand*(i+1)).floor
    ti = recs[i]
    tj =recs[j]
    recs[i] = tj
    recs[j] = ti
  end
  return recs
end

#rank_recommendationsObject



231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
# File 'app/models/entry.rb', line 231

def rank_recommendations
  return
  # get recommendations for the entry from the recommendations table
  recs = self.recommendations
  
  # for storing the various lists
  popular_recs = []
  relevant_recs = []
  other_recs = []

  # see where to cut draw the lines for popular and relevant
  click_threshold = calc_click_threshold(recs)
  relevance_threshold = calc_relevance_threshold(recs)
  
  # store the recommendations
  recs.each do |r|
    if (r["clicks"].to_i > click_threshold)
      popular_recs << r 
    elsif (r["relevance"].to_f > relevance_threshold)
      relevant_recs << r
    else 
      other_recs << r
    end
  end
  
  # order popular items strictly by clicks
  popular_recs.sort{|r1,r2| r2["avg_time_on_target"].to_i <=> r1["avg_time_on_target"].to_i}
  
  # cache the JSON for the lists in the entry record
  self.popular = ActiveSupport::JSON.encode(popular_recs)
  self.relevant = ActiveSupport::JSON.encode(relevant_recs)
  self.other = ActiveSupport::JSON.encode(other_recs)
  self.save!
end

#ranked_recommendations(limit = 5, order = "mixed", details = false, omit_feeds = nil) ⇒ Object



175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
# File 'app/models/entry.rb', line 175

def ranked_recommendations(limit = 5, order = "mixed", details = false, omit_feeds = nil)
  return relevant_recommendations(limit, "clicks DESC, relevance", details, omit_feeds) if order == "clicks" 
  return relevant_recommendations(limit, "relevance", details, omit_feeds) if (order == "relevance" || details == true)
  return relevant_recommendations_filtered(limit, details, omit_feeds) if omit_feeds != nil 

  recs = []
  if self.popular != nil && !self.popular.empty?
    recs.concat(ActiveSupport::JSON.decode(self.popular).first(limit))
  end
  if recs.length < limit && self.relevant != nil && !self.relevant.empty?
    relevant_recs = randomize(ActiveSupport::JSON.decode(self.relevant))
    recs.concat(relevant_recs.first(limit - recs.length))
  end
  if recs.length < limit && self.relevant != nil && !self.other.empty?
    other_recs = randomize(ActiveSupport::JSON.decode(self.other))
    recs.concat(other_recs.first(limit - recs.length))
  end
  return recs
end

#recommendation_entries(limit = 20, order = "relevance", details = false, omit_feeds = nil) ⇒ Object



70
71
72
73
74
75
76
77
78
79
80
81
# File 'app/models/entry.rb', line 70

def recommendation_entries(limit = 20, order = "relevance", details = false, omit_feeds = nil)
  sql = "SELECT recommendations.dest_entry_id AS id, entries.permalink, entries.title, entries.description, entries.direct_link, feeds.short_title AS collection "
  sql << ", relevance_calculated_at, relevance, clicks, avg_time_at_dest AS avg_time_on_target, author, published_at " if details == true
  sql << "FROM recommendations "
  sql << "INNER JOIN entries ON recommendations.dest_entry_id = entries.id "
  sql << "INNER JOIN feeds ON entries.feed_id = feeds.id "
  sql << "WHERE recommendations.entry_id = ? "
  sql << ("AND entries.feed_id NOT IN (" + omit_feeds.gsub(/[^0-9,]/,'') + ") ") if omit_feeds != nil
  sql << "ORDER BY " + order + " DESC "
  sql << "LIMIT " + limit.to_s
  Entry.find_by_sql([sql,self.id])
end

#recommendations(limit = 20, order = "relevance", details = false, omit_feeds = nil) ⇒ Object



57
58
59
60
61
62
63
64
65
66
67
68
# File 'app/models/entry.rb', line 57

def recommendations(limit = 20, order = "relevance", details = false, omit_feeds = nil)
  sql = "SELECT recommendations.id, dest_entry_id, entries.permalink, entries.title, entries.description, entries.direct_link, feeds.short_title AS collection "
  sql << ", relevance_calculated_at, relevance, clicks, avg_time_at_dest AS avg_time_on_target, author, published_at " if details == true
  sql << "FROM recommendations "
  sql << "INNER JOIN entries ON recommendations.dest_entry_id = entries.id "
  sql << "INNER JOIN feeds ON entries.feed_id = feeds.id "
  sql << "WHERE recommendations.entry_id = ? "
  sql << ("AND entries.feed_id NOT IN (" + omit_feeds.gsub(/[^0-9,]/,'') + ") ") if omit_feeds != nil
  sql << "ORDER BY " + order + " DESC "
  sql << "LIMIT " + limit.to_s
  Entry.find_by_sql([sql,self.id])
end

#relevant_recommendations(limit = 5, order = "relevance", details = false, omit_feeds = nil) ⇒ Object



166
167
168
# File 'app/models/entry.rb', line 166

def relevant_recommendations(limit = 5, order = "relevance", details = false, omit_feeds = nil)
  return self.recommendations(limit, order, details, omit_feeds)
end

#relevant_recommendations_filtered(limit, details, omit_feeds) ⇒ Object



195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
# File 'app/models/entry.rb', line 195

def relevant_recommendations_filtered(limit, details, omit_feeds)
  # get recommendations for the entry from the recommendations table
  recs = self.recommendations(limit, "mixed", details, omit_feeds)
  
  # for storing the various lists
  popular_recs = []
  relevant_recs = []
  other_recs = []

  # see where to cut draw the lines for popular and relevant
  click_threshold = calc_click_threshold(recs)
  relevance_threshold = calc_relevance_threshold(recs)
  
  # store the recommendations
  recs.each do |r|
    if (r["clicks"].to_i > click_threshold)
      popular_recs << r 
    elsif (r["relevance"].to_f > relevance_threshold)
      relevant_recs << r
    else 
      other_recs << r
    end
  end
  
  # order popular items strictly by clicks
  popular_recs.sort{|r1,r2| r2["avg_time_on_target"].to_i <=> r1["avg_time_on_target"].to_i}
  
  return popular_recs[0..limit] if popular_recs.size > limit 
  return (popular_recs + relevant_recs)[0..limit] if (popular_recs.size + relevant_recs.size) > limit
  return (popular_recs + relevant_recs + other_recs)[0..limit]
end