Module: OCR

Extended by:
OCR
Included in:
OCR
Defined in:
lib/ocr.rb

Overview

helper for OCR’ing single digits that were screen captured

Constant Summary collapse

GOCR =
File.expand_path(File.dirname(__FILE__) + "/../vendor/gocr048.exe -C 0-9:/ ")
CACHE =
{}
CACHE_FILE =
File.expand_path('~/.sensible-cinema-ocr.marshal')

Instance Method Summary collapse

Instance Method Details

#clear_cache!Object



59
60
61
62
# File 'lib/ocr.rb', line 59

def clear_cache!
  CACHE.clear
  File.delete CACHE_FILE if File.exist?(CACHE_FILE)
end

#identify_digit(memory_bitmap, options = {}) ⇒ Object

options are :might_be_colon, :should_invert



12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# File 'lib/ocr.rb', line 12

def identify_digit memory_bitmap, options = {}
  require 'mini_magick' # installation woe, but actually pretty fast

  if CACHE.has_key?(memory_bitmap)
    return CACHE[memory_bitmap] unless (defined?($OCR_NO_CACHE) && $OCR_NO_CACHE)
  end
  if options[:might_be_colon]
    # do processing in-line <sigh>
    total = (memory_bitmap.scan(/\x00{5}+/)).length
    if total >= 3 # really should be 4 for VLC
      # it had some darkness...therefore have been a colon!
      CACHE[memory_bitmap] = ":"
      return ":"
    end
  end
  image = MiniMagick::Image.from_blob(memory_bitmap)
  # any operation on image is expensive, requires convert.exe in path...
  if options[:should_invert] 
    # hulu wants negate
    # but doesn't want sharpen, for whatever reason...
    # mogrify calls it negate...
    image.negate 
  else
    # youtube wants sharpen...
    image.sharpen(2)
  end

  image.format(:pnm)
  for level in [130, 100, 0] # 130 for vlc, 100 for hulu, 0 for some youtube
    a = `#{GOCR} -l #{level} #{image.path} 2>NUL`
    # a can be like "_1_\n"
    if a =~ /[0-9]/
      a.strip!
      a.gsub!('_', '')
      a = a.to_i
      CACHE[memory_bitmap] = a
      return a
    end
  end
  # don't cache it...could use up too much space on accident.
  nil
end

#serialize_cache_to_diskObject



66
67
68
# File 'lib/ocr.rb', line 66

def serialize_cache_to_disk
  File.binwrite(CACHE_FILE, Marshal.dump(CACHE))
end

#unserialize_cache_from_diskObject



70
71
72
73
74
75
# File 'lib/ocr.rb', line 70

def unserialize_cache_from_disk  
  if File.exist? CACHE_FILE
    CACHE.merge!(Marshal.load File.binread(CACHE_FILE))
  end
  
end

#versionObject



55
56
57
# File 'lib/ocr.rb', line 55

def version
  `#{GOCR} -h 2>&1`
end