Class: PheldItunesDataMiner

Inherits:
Object
  • Object
show all
Defined in:
lib/pheld_itunes_data_miner.rb

Overview

Student Name: Peter Held Homework Week: 8

Constant Summary collapse

VERSION =
'1.0.0'

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.run(file_name) ⇒ Object



19
20
21
22
23
24
25
26
# File 'lib/pheld_itunes_data_miner.rb', line 19

def self.run file_name
  # read the tracks
  idm = PheldItunesDataMiner.new
  tracks = idm.parse_file(file_name)

  # print the stats
  idm.print_stats(tracks)
end

Instance Method Details

#correlation(x, y) ⇒ Object



372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
# File 'lib/pheld_itunes_data_miner.rb', line 372

def correlation(x, y)
 # Calculate the necessary values
 n = x.size

 sum_x = sum(x)
 sum_y = sum(y)

 x_squared = x.map {|item| item*item }
 y_squared = y.map {|item| item*item }

 sum_x_squared = sum(x_squared)
 sum_y_squared = sum(y_squared)

 xy = []
 x.each_with_index do |value, key|
   xy << value * y[key]
 end

 sum_xy = sum(xy)

 # Calculate the correlation value
 left = n * sum_xy - sum_x * sum_y
 right = ((n * sum_x_squared - sum_x**2) * (n * sum_y_squared - sum_y**2)) ** 0.5

 left / right
end

#get_average_year(tracks) ⇒ Object



290
291
292
293
294
295
296
297
298
299
300
# File 'lib/pheld_itunes_data_miner.rb', line 290

def get_average_year tracks
  # scrub the tracks for nils
  scrubbed_tracks = tracks.delete_if {|track| track.year == nil}

  total = 0
  scrubbed_tracks.each do |track|
    total += track.year
  end
  
  total / tracks.length
end

#get_bit_rate(tracks) ⇒ Object



354
355
356
357
358
359
360
361
362
# File 'lib/pheld_itunes_data_miner.rb', line 354

def get_bit_rate tracks
  bit_rates = []

  tracks.each do |track|
    bit_rates << track.bit_rate
  end

  bit_rates
end

#get_bitrate_playcount_correlation(tracks) ⇒ Object



263
264
265
266
267
268
269
270
271
# File 'lib/pheld_itunes_data_miner.rb', line 263

def get_bitrate_playcount_correlation tracks
  # scrub the tracks for nils
  scrubbed_tracks = tracks.delete_if {|track| (track.bit_rate == nil) || (track.play_count == nil) || (track.date_added == nil)}

  bit_rates = get_bit_rate scrubbed_tracks
  play_counts = get_play_counts_normalized_for_date_added scrubbed_tracks

  correlation(bit_rates, play_counts)
end


173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
# File 'lib/pheld_itunes_data_miner.rb', line 173

def get_most_popular_artists tracks, list_depth
  # scrub the tracks for nils
  scrubbed_tracks = tracks.delete_if {|track| track.artist == nil }

  artists = {}    # hash to store artist counts
  top_artists = []

  # get the list of artist occurance counts
  scrubbed_tracks.each do |track|
    if artists[track.artist]
      artists[track.artist] += 1
    else
      artists[track.artist] = 1
    end
  end

  # sort by the occurance counts
  sorted_artists = artists.sort {|a,b| b[1]<=>a[1]}

  if list_depth > 0
    (0..(list_depth - 1)).each do |index|
      if sorted_artists[index]
        top_artists << [ sorted_artists[index][0], sorted_artists[index][1] ]
      end
    end
  end

  top_artists
end


233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
# File 'lib/pheld_itunes_data_miner.rb', line 233

def get_most_popular_genres tracks, list_depth
  # scrub the tracks for nils
  scrubbed_tracks = tracks.delete_if {|track| track.genre == nil }

  genres = {}    # hash to store genre counts
  top_genres = []

  # get the list of artist occurance counts
  scrubbed_tracks.each do |track|
    if genres[track.genre]
      genres[track.genre] += 1
    else
      genres[track.genre] = 1
    end
  end

  # sort by the occurance counts
  sorted_genres = genres.sort {|a,b| b[1]<=>a[1]}

  if list_depth > 0
    (0..(list_depth - 1)).each do |index|
      if sorted_genres[index]
        top_genres << [ sorted_genres[index][0], sorted_genres[index][1] ]
      end
    end
  end

  top_genres
end


203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
# File 'lib/pheld_itunes_data_miner.rb', line 203

def get_most_popular_years tracks, list_depth
  # scrub the tracks for nils
  scrubbed_tracks = tracks.delete_if {|track| track.year == nil }

  years = {}    # hash to store artist counts
  top_years = []

  # get the list of artist occurance counts
  scrubbed_tracks.each do |track|
    if years[track.year]  # make sure the list even has this many in it
      years[track.year] = years[track.year] + 1
    else
      years[track.year] = 1
    end
  end

  # sort by the occurance counts
  sorted_years = years.sort {|a,b| b[1]<=>a[1]}

  if list_depth > 0
    (0..(list_depth - 1)).each do |index|
      if sorted_years[index]  # make sure the list even has this many in it
        top_years << [ sorted_years[index][0], sorted_years[index][1] ]
      end
    end
  end

  top_years
end

#get_number_of_albums(tracks) ⇒ Object



121
122
123
124
125
126
# File 'lib/pheld_itunes_data_miner.rb', line 121

def get_number_of_albums tracks
  # scrub the tracks for nils
  scrubbed_tracks = tracks.delete_if {|track| track.album == nil }

  scrubbed_tracks.map { |track| track.album }.uniq.length
end

#get_number_of_artists(tracks) ⇒ Object



114
115
116
117
118
119
# File 'lib/pheld_itunes_data_miner.rb', line 114

def get_number_of_artists tracks
  # scrub the tracks for nils
  scrubbed_tracks = tracks.delete_if {|track| track.artist == nil }

  scrubbed_tracks.map { |track| track.artist }.uniq.length
end

#get_number_of_genres(tracks) ⇒ Object



128
129
130
131
132
133
# File 'lib/pheld_itunes_data_miner.rb', line 128

def get_number_of_genres tracks
  # scrub the tracks for nils
  scrubbed_tracks = tracks.delete_if {|track| track.genre == nil }

  scrubbed_tracks.map { |track| track.genre }.uniq.length
end

#get_number_of_tracks(tracks) ⇒ Object



135
136
137
# File 'lib/pheld_itunes_data_miner.rb', line 135

def get_number_of_tracks tracks
  tracks.length
end

#get_play_count(tracks) ⇒ Object



324
325
326
327
328
329
330
331
332
# File 'lib/pheld_itunes_data_miner.rb', line 324

def get_play_count tracks
  play_counts = []

  tracks.each do |track|
    play_counts << track.play_count
  end

  play_counts
end

#get_play_counts_normalized_for_date_added(tracks) ⇒ Object



334
335
336
337
338
339
340
341
342
# File 'lib/pheld_itunes_data_miner.rb', line 334

def get_play_counts_normalized_for_date_added tracks
  play_counts = []

  tracks.each do |track|
    play_counts << get_playcount_normalized_for_date_added(track)
  end

  play_counts
end

#get_playcount_normalized_for_date_added(track) ⇒ Object



364
365
366
367
368
369
# File 'lib/pheld_itunes_data_miner.rb', line 364

def get_playcount_normalized_for_date_added track
  # Normalize for the age in days.  Multiplied by 10000 so that the values aren't fractions
  # and the correlation() method can use them.
  age_normalized = (10000 * track.play_count / (Date.today - track.date_added)).round
  age_normalized
end

#get_rating(tracks) ⇒ Object



344
345
346
347
348
349
350
351
352
# File 'lib/pheld_itunes_data_miner.rb', line 344

def get_rating tracks
  ratings = []

  tracks.each do |track|
    ratings << track.rating
  end

  ratings
end

#get_rating_playcount_correlation(tracks) ⇒ Object



273
274
275
276
277
278
279
280
281
# File 'lib/pheld_itunes_data_miner.rb', line 273

def get_rating_playcount_correlation tracks
  # scrub the tracks for nils
  scrubbed_tracks = tracks.delete_if {|track| (track.play_count == nil) || (track.rating == nil) || (track.date_added == nil)}

  play_counts = get_play_counts_normalized_for_date_added scrubbed_tracks
  ratings = get_rating scrubbed_tracks

  correlation(play_counts, ratings)
end

#get_total_playtime(tracks) ⇒ Object



139
140
141
142
143
144
145
146
147
148
149
150
151
# File 'lib/pheld_itunes_data_miner.rb', line 139

def get_total_playtime tracks
  # scrub the tracks for nils
  scrubbed_tracks = tracks.delete_if {|track| track.total_time == nil }

  total_playtime = 0

  scrubbed_tracks.each do |track|
    total_playtime = total_playtime + track.total_time
  end

  # takes seconds, but iTunes stores in milliseconds
  seconds_fraction_to_time(total_playtime / 1000)
end

#get_tracks_added_by_year(tracks) ⇒ Object



153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
# File 'lib/pheld_itunes_data_miner.rb', line 153

def get_tracks_added_by_year tracks
  # scrub the tracks for nils
  scrubbed_tracks = tracks.delete_if {|track| track.date_added == nil}

  tracks_added_by_year = {}    # hash to store year added counts

  # get the list of track occurance counts
  scrubbed_tracks.each do |track|
    year_added = track.date_added.year

    if tracks_added_by_year[year_added]
      tracks_added_by_year[year_added] += 1
    else
      tracks_added_by_year[year_added] = 1
    end
  end

  tracks_added_by_year.sort   # sort by year
end

#guess_age(tracks) ⇒ Object



284
285
286
287
288
# File 'lib/pheld_itunes_data_miner.rb', line 284

def guess_age tracks
  average_year = get_average_year(tracks)

  Time.now.year - average_year + 14
end

#parse_file(file_name) ⇒ Object



68
69
70
71
72
73
74
75
76
77
# File 'lib/pheld_itunes_data_miner.rb', line 68

def parse_file file_name
  if !File.exists?(file_name)
    return nil
  end

  file = File.new(file_name)
  file_text = file.read

  parse_library file_text
end

#parse_library(library_xml) ⇒ Object



79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# File 'lib/pheld_itunes_data_miner.rb', line 79

def parse_library library_xml
  tracks = []
  doc = Nokogiri::XML.parse(library_xml)

  # get an array of the track xmls
  last_key = nil
  doc.xpath('/plist/dict/dict/dict').each do |track_xml|
    track = PheldItunesTrack.new

    track_xml.children.each do |element|
      if (element.name == "key" )
        last_key = element.text
      else
        case last_key
        when "Track ID" then track.track_id = element.text.to_i
        when "Name" then track.name = element.text.strip
        when "Artist" then track.artist = element.text.strip
        when "Album" then track.album = element.text.strip
        when "Total Time" then track.total_time = element.text.to_i
        when "Year" then track.year = element.text.to_i
        when "Bit Rate" then track.bit_rate = element.text.to_i
        when "Play Count" then track.play_count = element.text.to_i
        when "Rating" then track.rating = element.text.to_i
        when "Date Added" then track.date_added = Date.parse(element.text)
        when "Genre" then track.genre = element.text.strip
        end
      end
    end

    tracks << track
  end

  tracks
end


28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# File 'lib/pheld_itunes_data_miner.rb', line 28

def print_stats tracks
  # totals
  puts "Track count: #{tracks.length.to_s}"
  puts "Number of artists: #{get_number_of_artists(tracks).to_s}"
  puts "Number of albums: #{get_number_of_albums(tracks).to_s}"
  total_playtime = get_total_playtime(tracks)
  puts "Total playtime: #{total_playtime[0].to_s} days, #{total_playtime[1].to_s} hours, #{total_playtime[2].to_s} minutes, #{total_playtime[3].to_s} seconds"

  # popular stuff
  ten_most_popular_artists = get_most_popular_artists(tracks, 10)
  puts "Ten most popular artists:"
  ten_most_popular_artists.each do |artist|
    puts "\t\"#{artist[0]}\"  -  #{artist[1].to_s} tracks"
  end
  ten_most_popular_years = get_most_popular_years(tracks, 10)
  puts "Ten most popular years:"
  ten_most_popular_years.each do |year|
    puts "\t\"#{year[0]}\"  -  #{year[1].to_s} tracks"
  end
  ten_most_popular_genres = get_most_popular_genres(tracks, 10)
  puts "Ten most popular genres:"
  ten_most_popular_genres.each do |genre|
    puts "\t\"#{genre[0]}\"  -  #{genre[1].to_s} tracks"
  end


  # other interesting statistics
  tracks_added_per_year = get_tracks_added_by_year(tracks)
  puts "The number of tracks added each year was:"
  tracks_added_per_year.each do |year|
    puts "\t\"#{year[0].to_s}\"  -  #{year[1].to_s} tracks"
  end
  bitrate_playcount_correlation = get_bitrate_playcount_correlation(tracks)
  puts "The correlation between bit rate and play count is: #{bitrate_playcount_correlation.to_s}."
  rating_playcount_correlation = get_rating_playcount_correlation(tracks)
  puts "The correlation between rating and play count is: #{rating_playcount_correlation.to_s}."
  age_guess = guess_age(tracks)
  puts "According to my calculations/assumptions and other peoples' research, your age is #{age_guess.to_s}."
end

#seconds_fraction_to_time(seconds) ⇒ Object

Converts seconds to an array with days, hours, minutes and seconds



303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
# File 'lib/pheld_itunes_data_miner.rb', line 303

def seconds_fraction_to_time seconds
  days = hours = mins = 0
  
  if seconds >=  60 then
    mins = (seconds / 60).to_i 
    seconds = (seconds % 60 ).to_i
    
    if mins >= 60 then
      hours = (mins / 60).to_i 
      mins = (mins % 60).to_i
      
      if hours >= 24 then
        days = (hours / 24).to_i
        hours = (hours % 24).to_i
      end
    end
  end
 
  [days,hours,mins,seconds]
end

#sum(list) ⇒ Object



400
401
402
# File 'lib/pheld_itunes_data_miner.rb', line 400

def sum(list)
 list.inject( nil ) { |sum,x| sum ? sum+x : x };
end