3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
|
# File 'lib/disco/data.rb', line 3
def load_movielens
item_path = download_file("ml-100k/u.item", "http://files.grouplens.org/datasets/movielens/ml-100k/u.item",
file_hash: "553841ebc7de3a0fd0d6b62a204ea30c1e651aacfb2814c7a6584ac52f2c5701")
data_path = download_file("ml-100k/u.data", "http://files.grouplens.org/datasets/movielens/ml-100k/u.data",
file_hash: "06416e597f82b7342361e41163890c81036900f418ad91315590814211dca490")
movies_str = File.read(item_path).encode("UTF-8", "binary", invalid: :replace, undef: :replace, replace: "")
movies = {}
CSV.parse(movies_str, col_sep: "|") do |row|
movies[row[0]] = row[1]
end
data = []
CSV.foreach(data_path, col_sep: "\t") do |row|
data << {
user_id: row[0].to_i,
item_id: movies[row[1]],
rating: row[2].to_i
}
end
data
end
|