Top Level Namespace

Includes:
FileUtils, OSX

Defined Under Namespace

Classes: PossibleDuplicateTrack

Constant Summary collapse

Itunes =
SBApplication.applicationWithBundleIdentifier_("com.apple.iTunes")
SkippedFiles =
[]

Instance Method Summary collapse

Instance Method Details

#calculate_md5(file_path) ⇒ Object



70
71
72
73
74
75
76
77
78
79
80
81
# File 'lib/itunes_dup_handler.rb', line 70

def calculate_md5( file_path )
  print "calculating md5 for file: #{file_path}"
  print " --> "
  STDOUT.flush
  
  #md5 = Digest::MD5.hexdigest( File.read( file_path ) )
  raw = `openssl dgst -md5 "#{file_path}"`
  md5 = raw.split[-1]
  print md5
  puts
  md5
end

#determine_duplicate(tracks) ⇒ Object

Takes an array of PossibleDuplicateTracks with the same MD5 signature and figures out which one with the duplicate simply by choosing the one(s) with the longer filename.



149
150
151
152
153
154
# File 'lib/itunes_dup_handler.rb', line 149

def determine_duplicate( tracks )
  # Get the track with the shortest unix_path
  tracks = tracks.sort_by {|track| File.basename( track.unix_path ).size } # shortest name first
  original = tracks.shift
  return original, tracks
end

#display_noteObject



286
287
288
289
290
291
292
293
294
295
296
297
298
299
# File 'lib/itunes_dup_handler.rb', line 286

def display_note
  puts
  puts "# NOTE: This program is designed to detect and remove only exact duplicates.\n# Some apparent duplicates may be left intact in the iTunes library after the\n# program runs. These are not true duplicates because their MD5 hashes do not\n# match for one reason or another. It could be that one version of the track is\n# protected and the other is not; that one version is compressed more than the\n# other, or that one version uses different protection encryption, or that the\n# versions simply have a few bits set differently for whatever reason. Please\n# check the files manually to be sure.\n"
  puts
end

#find_duplicates(tracks, run_limit = nil, mb_limit = nil) ⇒ Object

Takes an array of tracks and returns a hash where the keys are an MD5 digest and the values are arrays which contain one or more PossibleDuplicateTrack objects.



100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# File 'lib/itunes_dup_handler.rb', line 100

def find_duplicates( tracks, run_limit = nil, mb_limit=nil )
  stop_counter = 0
  md5_tracks = Hash.new {|hash, key| hash[key] = []} # will be keyed by MD5 hash
  tracks.each do |track|
    track_file_location = unixify_path( track.location.to_s ) 
    unless track_file_location && track_file_location.strip != ''
      puts ">>> No file location for #{track.name}. Removing this track from iTunes..."
      x = PossibleDuplicateTrack.new(track, nil)
      x.remove_from_itunes
      next
    end
    unless File.exist?(track_file_location)
      puts ">>> No file found for #{track.name} at #{track_file_location}" 
      SkippedFiles << track_file_location
      next
    end
    mb = File.size(track_file_location) / (1024 * 1024)
    if mb_limit && mb > mb_limit
      puts "SKIPPING FILE DUE TO EXCESS SIZE (#{mb}MB): #{track.name} : #{track_file_location}"
      SkippedFiles << track_file_location
      puts "Please deal with any duplicates of this file manually"
      next
    end

    # We will use the MD5 digest to identify identical files.
    md5 = calculate_md5( track_file_location )
    if md5.nil? # missing file or bad file path
      SkippedFiles << track_file_location
      next
    end
    # This might be a duplicate files
    md5_tracks[md5] << PossibleDuplicateTrack.new( track, md5 )
    stop_counter += 1
    if run_limit && stop_counter > run_limit
      break 
    end
  end
  puts "=" * 40
  # puts "Total Tracks: #{tracks.size}" # This number is confusing 
  puts "Total Unique Media Files: #{md5_tracks.keys.size}"
  tracks_with_dups = md5_tracks.delete_if {|key, value| value.size < 2} 
  puts "Tracks With Duplicates: #{tracks_with_dups.keys.size}"
  puts "=" * 40
  return tracks_with_dups
end

#normalize_path(path) ⇒ Object



93
94
95
# File 'lib/itunes_dup_handler.rb', line 93

def normalize_path(path)
  path.sub(/\s\d(.\w{3})$/, '\1')
end

#run(pattern = nil, run_limit = nil) ⇒ Object



156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
# File 'lib/itunes_dup_handler.rb', line 156

def run(pattern=nil, run_limit=nil)
  # Load the iTunes instance
  title = "itunes duplicate track removal machine"
  width = 80
  puts "-" * width
  puts
  puts "itunes duplicate track removal machine".center(width)
  puts
  puts "by daniel choi".center(width)
  puts "betahouse".center(width)
  puts "cambridge, ma, usa".center(width)
  puts 
  puts "contact: [email protected]".center(width)
  puts
  puts "-" * width
  
  puts
  sleep 1
  puts "Running program. It may take several minutes to analyze all the tracks in your\niTunes library.\n"
  sleep 2
  puts 
  puts "To avoid any potential hangs and crashes, don't use iTunes while the program is\nrunning.\n"
  sleep 2
  puts
  puts "When this program is done analyzing your tracks, it will show you the duplicates\nit has detected and ask for your confirmation before removing any files. So feel\nfree to go get some coffee and come back.\n"
  puts
  sleep 1
  puts "To make the program run faster, we can skip iTunes files that are very large."
  puts "How many megabytes do you want to set as the maximum? "
  print "(Default: 100mb. Type 'none' for no limit): "
  response = gets
  if response =~ /none/
    mb_limit = nil
  elsif response.to_i == 0 
    mb_limit = 100
  else 
    mb_limit = response.to_i
  end
  if mb_limit
    puts "Setting maximum file size to analyze to #{mb_limit}MB."
  else
    puts "Setting maximum file size to analyze to unlimited."
  end
  sleep 3
  source = Itunes.sources.first 
  puts "Using iTunes source: #{source.name}"
  puts "Using iTunes playlist: #{source.playlists.first.name}"

  duplicate_tracks = []
  fileTracks = source.libraryPlaylists[0].fileTracks
  if pattern # filter tracks
    fileTracks = fileTracks.select {|track| track.name.to_s =~ pattern}
  end
  puts "Calculating MD5 digests for all tracks with files..." 
  sleep 2
  puts
  tracks_with_dups = find_duplicates( fileTracks, run_limit, mb_limit )
  puts "Finding duplicates..."
  puts
  sleep 2
  
  # Now do something with the duplicate tracks
  puts "=" * 40
  SkippedFiles.compact! 
  SkippedFiles.reject! {|x| x.strip == ''}
  unless SkippedFiles.empty?
    puts "#{SkippedFiles.length} files were skipped, either because they could not be found or because they were too large (over #{mb_limit}MB) to analyze quickly. Also, files may be skipped because their path contains non-standard characters. Please look for duplicates in these files manually."
    puts
    puts "Please handle any duplicates of these special files manually."
    puts
    puts "See these files? (y/n) (default: n) "
    if gets.strip.downcase == "y"
      puts "Skipped Files:"
      SkippedFiles.each do |file|
        puts file
      end
      puts "=" * 40
      puts "Continue? (y/n)"
      if gets =~ /^n/
        puts "Aborted."
        exit
      end

    end
  end

  puts "FOUND DUPLICATES:"
  sleep 1
  if tracks_with_dups.empty?
    puts "No true duplicates found."
    puts "Exiting."
    display_note
    exit 
  end
  tracks_with_dups.each do |k,tracks|
    puts "#{k} :"
    original, dups = determine_duplicate( tracks )
    puts "  #{original.database_id} : #{original.unix_path} <-- original"
    dups.each do |track|
      puts "  #{track.database_id} : #{track.unix_path} <-- duplicate"
    end
    duplicate_tracks << dups 
  end
  duplicate_tracks.flatten!

  num_to_remove = duplicate_tracks.length
  print "Go ahead and delete the #{num_to_remove} files marked 'duplicate'? (y/n) (default: y) "

  unless gets.strip.downcase == "n"
    puts "OK, here goes..."
    # DELETE EM
    duplicate_tracks.each {|track| track.delete!}
    sleep 2
    puts "Done. #{num_to_remove} duplicates removed."
    display_note
  else
    puts "Canceled."
  end
end

#unixify_path(path) ⇒ Object Also known as: unixify



83
84
85
86
87
88
89
90
# File 'lib/itunes_dup_handler.rb', line 83

def unixify_path(path)
  # CGI strips out literal +, etc. signs in the path, but we need to preserve them.
  ['+', "'", '(', ')'].each do |symbol|
    path.gsub!(symbol, CGI.escape(symbol))
  end
  path.gsub!('$', '\$')
  CGI.unescape( path.sub(/^(file:\/\/localhost)/, '') ).strip
end