Class: Disco::Recommender

Inherits:
Object
  • Object
show all
Defined in:
lib/disco/recommender.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(factors: 8, epochs: 20, verbose: nil) ⇒ Recommender

Returns a new instance of Recommender.



5
6
7
8
9
# File 'lib/disco/recommender.rb', line 5

def initialize(factors: 8, epochs: 20, verbose: nil)
  @factors = factors
  @epochs = epochs
  @verbose = verbose
end

Instance Attribute Details

#global_meanObject (readonly)

Returns the value of attribute global_mean.



3
4
5
# File 'lib/disco/recommender.rb', line 3

def global_mean
  @global_mean
end

#item_factorsObject (readonly)

Returns the value of attribute item_factors.



3
4
5
# File 'lib/disco/recommender.rb', line 3

def item_factors
  @item_factors
end

#user_factorsObject (readonly)

Returns the value of attribute user_factors.



3
4
5
# File 'lib/disco/recommender.rb', line 3

def user_factors
  @user_factors
end

Instance Method Details

#fit(train_set, validation_set: nil) ⇒ Object



11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# File 'lib/disco/recommender.rb', line 11

def fit(train_set, validation_set: nil)
  train_set = to_dataset(train_set)
  validation_set = to_dataset(validation_set) if validation_set

  @implicit = !train_set.any? { |v| v[:rating] }

  unless @implicit
    ratings = train_set.map { |o| o[:rating] }
    check_ratings(ratings)
    @min_rating = ratings.min
    @max_rating = ratings.max

    if validation_set
      check_ratings(validation_set.map { |o| o[:rating] })
    end
  end

  check_training_set(train_set)
  create_maps(train_set)

  @rated = Hash.new { |hash, key| hash[key] = {} }
  input = []
  value_key = @implicit ? :value : :rating
  train_set.each do |v|
    u = @user_map[v[:user_id]]
    i = @item_map[v[:item_id]]
    @rated[u][i] = true

    # explicit will always have a value due to check_ratings
    input << [u, i, v[value_key] || 1]
  end
  @rated.default = nil

  eval_set = nil
  if validation_set
    eval_set = []
    validation_set.each do |v|
      u = @user_map[v[:user_id]]
      i = @item_map[v[:item_id]]

      # set to non-existent item
      u ||= -1
      i ||= -1

      eval_set << [u, i, v[value_key] || 1]
    end
  end

  loss = @implicit ? 12 : 0
  verbose = @verbose
  verbose = true if verbose.nil? && eval_set
  model = Libmf::Model.new(loss: loss, factors: @factors, iterations: @epochs, quiet: !verbose)
  model.fit(input, eval_set: eval_set)

  @global_mean = model.bias

  @user_factors = model.p_factors(format: :numo)
  @item_factors = model.q_factors(format: :numo)

  @user_index = nil
  @item_index = nil
end

#optimize_similar_itemsObject Also known as: optimize_item_recs



133
134
135
136
# File 'lib/disco/recommender.rb', line 133

def optimize_similar_items
  check_fit
  @item_index = create_index(@item_factors)
end

#optimize_similar_usersObject



139
140
141
142
# File 'lib/disco/recommender.rb', line 139

def optimize_similar_users
  check_fit
  @user_index = create_index(@user_factors)
end

#predict(data) ⇒ Object

generates a prediction even if a user has already rated the item



75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# File 'lib/disco/recommender.rb', line 75

def predict(data)
  data = to_dataset(data)

  u = data.map { |v| @user_map[v[:user_id]] }
  i = data.map { |v| @item_map[v[:item_id]] }

  new_index = data.each_index.select { |index| u[index].nil? || i[index].nil? }
  new_index.each do |j|
    u[j] = 0
    i[j] = 0
  end

  predictions = @user_factors[u, true].inner(@item_factors[i, true])
  predictions.inplace.clip(@min_rating, @max_rating) if @min_rating
  predictions[new_index] = @global_mean
  predictions.to_a
end

#similar_items(item_id, count: 5) ⇒ Object Also known as: item_recs



144
145
146
147
# File 'lib/disco/recommender.rb', line 144

def similar_items(item_id, count: 5)
  check_fit
  similar(item_id, @item_map, @item_factors, item_norms, count, @item_index)
end

#similar_users(user_id, count: 5) ⇒ Object



150
151
152
153
# File 'lib/disco/recommender.rb', line 150

def similar_users(user_id, count: 5)
  check_fit
  similar(user_id, @user_map, @user_factors, user_norms, count, @user_index)
end

#user_recs(user_id, count: 5, item_ids: nil) ⇒ Object



93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# File 'lib/disco/recommender.rb', line 93

def user_recs(user_id, count: 5, item_ids: nil)
  check_fit
  u = @user_map[user_id]

  if u
    predictions = @item_factors.inner(@user_factors[u, true])

    predictions =
      @item_map.keys.zip(predictions).map do |item_id, pred|
        {item_id: item_id, score: pred}
      end

    if item_ids
      idx = item_ids.map { |i| @item_map[i] }.compact
      predictions = predictions.values_at(*idx)
    else
      @rated[u].keys.sort_by { |v| -v }.each do |i|
        predictions.delete_at(i)
      end
    end

    predictions.sort_by! { |pred| -pred[:score] } # already sorted by id
    predictions = predictions.first(count) if count && !item_ids

    # clamp *after* sorting
    # also, only needed for returned predictions
    if @min_rating
      predictions.each do |pred|
        pred[:score] = pred[:score].clamp(@min_rating, @max_rating)
      end
    end

    predictions
  else
    # no items if user is unknown
    # TODO maybe most popular items
    []
  end
end