Top Level Namespace

Includes:: FileUtils, OSX

Defined Under Namespace

Constant Summary collapse

Itunes =

SBApplication.applicationWithBundleIdentifier_("com.apple.iTunes")

SkippedFiles =

[]

Instance Method Summary collapse

#calculate_md5(file_path) ⇒ Object
#determine_duplicate(tracks) ⇒ Object

Takes an array of PossibleDuplicateTracks with the same MD5 signature and figures out which one with the duplicate simply by choosing the one(s) with the longer filename.
#display_note ⇒ Object
#find_duplicates(tracks, run_limit = nil, mb_limit = nil) ⇒ Object

Takes an array of tracks and returns a hash where the keys are an MD5 digest and the values are arrays which contain one or more PossibleDuplicateTrack objects.
#normalize_path(path) ⇒ Object
#run(pattern = nil, run_limit = nil) ⇒ Object
#unixify_path(path) ⇒ Object (also: #unixify)

Instance Method Details

#calculate_md5(file_path) ⇒ `Object`

# File 'lib/itunes_dup_handler.rb', line 70

def calculate_md5( file_path )
  print "calculating md5 for file: #{file_path}"
  print " --> "
  STDOUT.flush
  
  #md5 = Digest::MD5.hexdigest( File.read( file_path ) )
  raw = `openssl dgst -md5 "#{file_path}"`
  md5 = raw.split[-1]
  print md5
  puts
  md5
end

#determine_duplicate(tracks) ⇒ `Object`

Takes an array of PossibleDuplicateTracks with the same MD5 signature and figures out which one with the duplicate simply by choosing the one(s) with the longer filename.

# File 'lib/itunes_dup_handler.rb', line 149

def determine_duplicate( tracks )
  # Get the track with the shortest unix_path
  tracks = tracks.sort_by {|track| File.basename( track.unix_path ).size } # shortest name first
  original = tracks.shift
  return original, tracks
end

#display_note ⇒ `Object`

# File 'lib/itunes_dup_handler.rb', line 286

def display_note
  puts
  puts "# NOTE: This program is designed to detect and remove only exact duplicates.\n# Some apparent duplicates may be left intact in the iTunes library after the\n# program runs. These are not true duplicates because their MD5 hashes do not\n# match for one reason or another. It could be that one version of the track is\n# protected and the other is not; that one version is compressed more than the\n# other, or that one version uses different protection encryption, or that the\n# versions simply have a few bits set differently for whatever reason. Please\n# check the files manually to be sure.\n"
  puts
end

#find_duplicates(tracks, run_limit = nil, mb_limit = nil) ⇒ `Object`

Takes an array of tracks and returns a hash where the keys are an MD5 digest and the values are arrays which contain one or more PossibleDuplicateTrack objects.

# File 'lib/itunes_dup_handler.rb', line 100

def find_duplicates( tracks, run_limit = nil, mb_limit=nil )
  stop_counter = 0
  md5_tracks = Hash.new {|hash, key| hash[key] = []} # will be keyed by MD5 hash
  tracks.each do |track|
    track_file_location = unixify_path( track.location.to_s ) 
    unless track_file_location && track_file_location.strip != ''
      puts ">>> No file location for #{track.name}. Removing this track from iTunes..."
      x = PossibleDuplicateTrack.new(track, nil)
      x.remove_from_itunes
      next
    end
    unless File.exist?(track_file_location)
      puts ">>> No file found for #{track.name} at #{track_file_location}" 
      SkippedFiles << track_file_location
      next
    end
    mb = File.size(track_file_location) / (1024 * 1024)
    if mb_limit && mb > mb_limit
      puts "SKIPPING FILE DUE TO EXCESS SIZE (#{mb}MB): #{track.name} : #{track_file_location}"
      SkippedFiles << track_file_location
      puts "Please deal with any duplicates of this file manually"
      next
    end

    # We will use the MD5 digest to identify identical files.
    md5 = calculate_md5( track_file_location )
    if md5.nil? # missing file or bad file path
      SkippedFiles << track_file_location
      next
    end
    # This might be a duplicate files
    md5_tracks[md5] << PossibleDuplicateTrack.new( track, md5 )
    stop_counter += 1
    if run_limit && stop_counter > run_limit
      break 
    end
  end
  puts "=" * 40
  # puts "Total Tracks: #{tracks.size}" # This number is confusing 
  puts "Total Unique Media Files: #{md5_tracks.keys.size}"
  tracks_with_dups = md5_tracks.delete_if {|key, value| value.size < 2} 
  puts "Tracks With Duplicates: #{tracks_with_dups.keys.size}"
  puts "=" * 40
  return tracks_with_dups
end

#normalize_path(path) ⇒ `Object`



93
94
95

# File 'lib/itunes_dup_handler.rb', line 93

def normalize_path(path)
  path.sub(/\s\d(.\w{3})$/, '\1')
end

#run(pattern = nil, run_limit = nil) ⇒ `Object`

# File 'lib/itunes_dup_handler.rb', line 156

def run(pattern=nil, run_limit=nil)
  # Load the iTunes instance
  title = "itunes duplicate track removal machine"
  width = 80
  puts "-" * width
  puts
  puts "itunes duplicate track removal machine".center(width)
  puts
  puts "by daniel choi".center(width)
  puts "betahouse".center(width)
  puts "cambridge, ma, usa".center(width)
  puts 
  puts "contact: [email protected]".center(width)
  puts
  puts "-" * width
  
  puts
  sleep 1
  puts "Running program. It may take several minutes to analyze all the tracks in your\niTunes library.\n"
  sleep 2
  puts 
  puts "To avoid any potential hangs and crashes, don't use iTunes while the program is\nrunning.\n"
  sleep 2
  puts
  puts "When this program is done analyzing your tracks, it will show you the duplicates\nit has detected and ask for your confirmation before removing any files. So feel\nfree to go get some coffee and come back.\n"
  puts
  sleep 1
  puts "To make the program run faster, we can skip iTunes files that are very large."
  puts "How many megabytes do you want to set as the maximum? "
  print "(Default: 100mb. Type 'none' for no limit): "
  response = gets
  if response =~ /none/
    mb_limit = nil
  elsif response.to_i == 0 
    mb_limit = 100
  else 
    mb_limit = response.to_i
  end
  if mb_limit
    puts "Setting maximum file size to analyze to #{mb_limit}MB."
  else
    puts "Setting maximum file size to analyze to unlimited."
  end
  sleep 3
  source = Itunes.sources.first 
  puts "Using iTunes source: #{source.name}"
  puts "Using iTunes playlist: #{source.playlists.first.name}"

  duplicate_tracks = []
  fileTracks = source.libraryPlaylists[0].fileTracks
  if pattern # filter tracks
    fileTracks = fileTracks.select {|track| track.name.to_s =~ pattern}
  end
  puts "Calculating MD5 digests for all tracks with files..." 
  sleep 2
  puts
  tracks_with_dups = find_duplicates( fileTracks, run_limit, mb_limit )
  puts "Finding duplicates..."
  puts
  sleep 2
  
  # Now do something with the duplicate tracks
  puts "=" * 40
  SkippedFiles.compact! 
  SkippedFiles.reject! {|x| x.strip == ''}
  unless SkippedFiles.empty?
    puts "#{SkippedFiles.length} files were skipped, either because they could not be found or because they were too large (over #{mb_limit}MB) to analyze quickly. Also, files may be skipped because their path contains non-standard characters. Please look for duplicates in these files manually."
    puts
    puts "Please handle any duplicates of these special files manually."
    puts
    puts "See these files? (y/n) (default: n) "
    if gets.strip.downcase == "y"
      puts "Skipped Files:"
      SkippedFiles.each do |file|
        puts file
      end
      puts "=" * 40
      puts "Continue? (y/n)"
      if gets =~ /^n/
        puts "Aborted."
        exit
      end

    end
  end

  puts "FOUND DUPLICATES:"
  sleep 1
  if tracks_with_dups.empty?
    puts "No true duplicates found."
    puts "Exiting."
    display_note
    exit 
  end
  tracks_with_dups.each do |k,tracks|
    puts "#{k} :"
    original, dups = determine_duplicate( tracks )
    puts "  #{original.database_id} : #{original.unix_path} <-- original"
    dups.each do |track|
      puts "  #{track.database_id} : #{track.unix_path} <-- duplicate"
    end
    duplicate_tracks << dups 
  end
  duplicate_tracks.flatten!

  num_to_remove = duplicate_tracks.length
  print "Go ahead and delete the #{num_to_remove} files marked 'duplicate'? (y/n) (default: y) "

  unless gets.strip.downcase == "n"
    puts "OK, here goes..."
    # DELETE EM
    duplicate_tracks.each {|track| track.delete!}
    sleep 2
    puts "Done. #{num_to_remove} duplicates removed."
    display_note
  else
    puts "Canceled."
  end
end

#unixify_path(path) ⇒ `Object` Also known as: unixify

# File 'lib/itunes_dup_handler.rb', line 83

def unixify_path(path)
  # CGI strips out literal +, etc. signs in the path, but we need to preserve them.
  ['+', "'", '(', ')'].each do |symbol|
    path.gsub!(symbol, CGI.escape(symbol))
  end
  path.gsub!('$', '\$')
  CGI.unescape( path.sub(/^(file:\/\/localhost)/, '') ).strip
end

Top Level Namespace

Defined Under Namespace

Constant Summary collapse

Instance Method Summary collapse

Instance Method Details

#calculate_md5(file_path) ⇒ Object

#determine_duplicate(tracks) ⇒ Object

#display_note ⇒ Object

#find_duplicates(tracks, run_limit = nil, mb_limit = nil) ⇒ Object

#normalize_path(path) ⇒ Object

#run(pattern = nil, run_limit = nil) ⇒ Object