Class: RTesseract

Inherits:
Object
  • Object
show all
Defined in:
lib/rtesseract.rb,
lib/rtesseract/mixed.rb,
lib/rtesseract/errors.rb

Defined Under Namespace

Classes: ConversionError, ImageNotSelectedError, Mixed, TempFilesNotRemovedError

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(src = "", options = {}) ⇒ RTesseract

Returns a new instance of RTesseract.



14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# File 'lib/rtesseract.rb', line 14

def initialize(src = "", options = {})
  @command = options.delete(:command) || default_command
  @lang    = options.delete(:lang)    || options.delete("lang") || ""
  @psm     = options.delete(:psm)     || options.delete("psm")  || nil
  @clear_console_output = options.delete(:clear_console_output)
  @clear_console_output = true if @clear_console_output.nil?
  @options = options
  @value   = ""
  @x, @y, @w, @h = []
  @processor = options.delete(:processor) || options.delete("processor")
  choose_processor!
  if is_a_instance?(src)
    @source = Pathname.new '.'
    @instance = src
  else
    @instance = nil
    @source  = Pathname.new src
  end
end

Instance Attribute Details

#langObject

Select the language

Languages

  • eng - English

  • deu - German

  • deu-f - German fraktur

  • fra - French

  • ita - Italian

  • nld - Dutch

  • por - Portuguese

  • spa - Spanish

  • vie - Vietnamese

Note: Make sure you have installed the language to tesseract



93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# File 'lib/rtesseract.rb', line 93

def lang
  language = "#{@lang}".strip.downcase
  { #Aliases to languages names
    "eng" => ["en","en-us","english"],
    "ita" => ["it"],
    "por" => ["pt","pt-br","portuguese"],
    "spa" => ["sp"]
  }.each do |value,names|
    return " -l #{value} " if names.include? language
  end
  return " -l #{language} " if language.size > 0
  ""
rescue
  ""
end

#optionsObject

Returns the value of attribute options.



9
10
11
# File 'lib/rtesseract.rb', line 9

def options
  @options
end

#processorObject (readonly)

Returns the value of attribute processor.



12
13
14
# File 'lib/rtesseract.rb', line 12

def processor
  @processor
end

#psmObject

Page Segment Mode



110
111
112
113
114
# File 'lib/rtesseract.rb', line 110

def psm
  @psm.nil? ? "" : " -psm #{@psm} "
rescue
  ""
end

Class Method Details

.read(src = nil, options = {}) {|image| ... } ⇒ Object

Yields:

  • (image)

Raises:



40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/rtesseract.rb', line 40

def self.read(src = nil, options = {}, &block)
  raise RTesseract::ImageNotSelectedError if src == nil
  processor = options.delete(:processor) || options.delete("processor")
  if processor == "mini_magick"
    image = MiniMagickProcessor.read_with_processor(src.to_s)
  else
    image = RMagickProcessor.read_with_processor(src.to_s)
  end
  yield image
  object = RTesseract.new("", options)
  object.from_blob(image.to_blob)
  object
end

Instance Method Details

#clear_console_outputObject

TODO: Clear console for MacOS or Windows



130
131
132
133
# File 'lib/rtesseract.rb', line 130

def clear_console_output
  return "" unless @clear_console_output
  return "2>/dev/null" if File.exist?("/dev/null") #Linux console clear
end

#configObject



116
117
118
119
# File 'lib/rtesseract.rb', line 116

def config
  @options ||= {}
  @options.collect{|k,v| "#{k} #{v}" }.join("\n")
end

#config_fileObject



121
122
123
124
125
126
127
# File 'lib/rtesseract.rb', line 121

def config_file
  return "" if @options == {}
  conf = Tempfile.new("config")
  conf.write(config)
  conf.flush
  conf.path
end

#convertObject

Convert image to string



136
137
138
139
140
141
142
143
144
# File 'lib/rtesseract.rb', line 136

def convert
  path = Tempfile.new(["",".txt"]).path.to_s
  tmp_image = image_to_tiff
  `#{@command} "#{tmp_image.path}" "#{path.gsub(".txt","")}" #{lang} #{psm} #{config_file} #{clear_console_output}`
  @value = File.read("#{path}").to_s
  remove_file([tmp_image])
rescue
  raise RTesseract::ConversionError
end

#crop!(x, y, width, height) ⇒ Object

Crop image to convert



65
66
67
68
# File 'lib/rtesseract.rb', line 65

def crop!(x,y,width,height)
  @x, @y, @w, @h = x, y, width, height
  self
end

#default_commandObject



34
35
36
37
38
# File 'lib/rtesseract.rb', line 34

def default_command
  TesseractBin::Executables[:tesseract] || 'tesseract'
rescue
  "tesseract"
end

#from_blob(blob) ⇒ Object

Read image from memory blob



147
148
149
150
151
152
153
154
155
156
157
# File 'lib/rtesseract.rb', line 147

def from_blob(blob)
  blob_file = Tempfile.new("blob")
  blob_file.write(blob)
  blob_file.rewind
  blob_file.flush
  self.source = blob_file.path
  convert
  remove_file([blob_file])
rescue
  raise RTesseract::ConversionError
end

#image_nameObject



59
60
61
# File 'lib/rtesseract.rb', line 59

def image_name
  @source.basename
end

#remove_file(files = []) ⇒ Object

Remove files



71
72
73
74
75
76
77
78
79
# File 'lib/rtesseract.rb', line 71

def remove_file(files=[])
  files.each do |file|
    file.close
    file.unlink
  end
  true
rescue
  raise RTesseract::TempFilesNotRemovedError
end

#source=(src) ⇒ Object



54
55
56
57
# File 'lib/rtesseract.rb', line 54

def source= src
  @value = ""
  @source = Pathname.new src
end

#to_sObject

Output value



160
161
162
163
164
165
166
167
168
# File 'lib/rtesseract.rb', line 160

def to_s
  return @value if @value != ""
  if @source.file? || !@instance.nil?
    convert
    @value
  else
    raise RTesseract::ImageNotSelectedError
  end
end

#to_s_without_spacesObject

Remove spaces and break-lines



171
172
173
# File 'lib/rtesseract.rb', line 171

def to_s_without_spaces
  to_s.gsub(" ","").gsub("\n","").gsub("\r","")
end