Class: RTesseract
- Inherits:
-
Object
- Object
- RTesseract
- Defined in:
- lib/rtesseract.rb,
lib/rtesseract/mixed.rb,
lib/rtesseract/errors.rb
Defined Under Namespace
Classes: ConversionError, ImageNotSelectedError, Mixed, TempFilesNotRemovedError
Instance Attribute Summary collapse
-
#lang ⇒ Object
Select the language ===Languages * eng - English * deu - German * deu-f - German fraktur * fra - French * ita - Italian * nld - Dutch * por - Portuguese * spa - Spanish * vie - Vietnamese Note: Make sure you have installed the language to tesseract.
-
#options ⇒ Object
Returns the value of attribute options.
-
#processor ⇒ Object
readonly
Returns the value of attribute processor.
-
#psm ⇒ Object
Page Segment Mode.
Class Method Summary collapse
Instance Method Summary collapse
-
#clear_console_output ⇒ Object
TODO: Clear console for MacOS or Windows.
- #config ⇒ Object
- #config_file ⇒ Object
-
#convert ⇒ Object
Convert image to string.
-
#crop!(x, y, width, height) ⇒ Object
Crop image to convert.
- #default_command ⇒ Object
-
#from_blob(blob) ⇒ Object
Read image from memory blob.
- #image_name ⇒ Object
-
#initialize(src = "", options = {}) ⇒ RTesseract
constructor
A new instance of RTesseract.
-
#remove_file(files = []) ⇒ Object
Remove files.
- #source=(src) ⇒ Object
-
#to_s ⇒ Object
Output value.
-
#to_s_without_spaces ⇒ Object
Remove spaces and break-lines.
Constructor Details
#initialize(src = "", options = {}) ⇒ RTesseract
Returns a new instance of RTesseract.
14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 |
# File 'lib/rtesseract.rb', line 14 def initialize(src = "", = {}) @command = .delete(:command) || default_command @lang = .delete(:lang) || .delete("lang") || "" @psm = .delete(:psm) || .delete("psm") || nil @clear_console_output = .delete(:clear_console_output) @clear_console_output = true if @clear_console_output.nil? = @value = "" @x, @y, @w, @h = [] @processor = .delete(:processor) || .delete("processor") choose_processor! if is_a_instance?(src) @source = Pathname.new '.' @instance = src else @instance = nil @source = Pathname.new src end end |
Instance Attribute Details
#lang ⇒ Object
Select the language
Languages
-
eng - English
-
deu - German
-
deu-f - German fraktur
-
fra - French
-
ita - Italian
-
nld - Dutch
-
por - Portuguese
-
spa - Spanish
-
vie - Vietnamese
Note: Make sure you have installed the language to tesseract
93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 |
# File 'lib/rtesseract.rb', line 93 def lang language = "#{@lang}".strip.downcase { #Aliases to languages names "eng" => ["en","en-us","english"], "ita" => ["it"], "por" => ["pt","pt-br","portuguese"], "spa" => ["sp"] }.each do |value,names| return " -l #{value} " if names.include? language end return " -l #{language} " if language.size > 0 "" rescue "" end |
#options ⇒ Object
Returns the value of attribute options.
9 10 11 |
# File 'lib/rtesseract.rb', line 9 def end |
#processor ⇒ Object (readonly)
Returns the value of attribute processor.
12 13 14 |
# File 'lib/rtesseract.rb', line 12 def processor @processor end |
#psm ⇒ Object
Page Segment Mode
110 111 112 113 114 |
# File 'lib/rtesseract.rb', line 110 def psm @psm.nil? ? "" : " -psm #{@psm} " rescue "" end |
Class Method Details
.read(src = nil, options = {}) {|image| ... } ⇒ Object
40 41 42 43 44 45 46 47 48 49 50 51 52 |
# File 'lib/rtesseract.rb', line 40 def self.read(src = nil, = {}, &block) raise RTesseract::ImageNotSelectedError if src == nil processor = .delete(:processor) || .delete("processor") if processor == "mini_magick" image = MiniMagickProcessor.read_with_processor(src.to_s) else image = RMagickProcessor.read_with_processor(src.to_s) end yield image object = RTesseract.new("", ) object.from_blob(image.to_blob) object end |
Instance Method Details
#clear_console_output ⇒ Object
TODO: Clear console for MacOS or Windows
130 131 132 133 |
# File 'lib/rtesseract.rb', line 130 def clear_console_output return "" unless @clear_console_output return "2>/dev/null" if File.exist?("/dev/null") #Linux console clear end |
#config ⇒ Object
116 117 118 119 |
# File 'lib/rtesseract.rb', line 116 def config ||= {} .collect{|k,v| "#{k} #{v}" }.join("\n") end |
#config_file ⇒ Object
121 122 123 124 125 126 127 |
# File 'lib/rtesseract.rb', line 121 def config_file return "" if == {} conf = Tempfile.new("config") conf.write(config) conf.flush conf.path end |
#convert ⇒ Object
Convert image to string
136 137 138 139 140 141 142 143 144 |
# File 'lib/rtesseract.rb', line 136 def convert path = Tempfile.new(["",".txt"]).path.to_s tmp_image = image_to_tiff `#{@command} "#{tmp_image.path}" "#{path.gsub(".txt","")}" #{lang} #{psm} #{config_file} #{clear_console_output}` @value = File.read("#{path}").to_s remove_file([tmp_image]) rescue raise RTesseract::ConversionError end |
#crop!(x, y, width, height) ⇒ Object
Crop image to convert
65 66 67 68 |
# File 'lib/rtesseract.rb', line 65 def crop!(x,y,width,height) @x, @y, @w, @h = x, y, width, height self end |
#default_command ⇒ Object
34 35 36 37 38 |
# File 'lib/rtesseract.rb', line 34 def default_command TesseractBin::Executables[:tesseract] || 'tesseract' rescue "tesseract" end |
#from_blob(blob) ⇒ Object
Read image from memory blob
147 148 149 150 151 152 153 154 155 156 157 |
# File 'lib/rtesseract.rb', line 147 def from_blob(blob) blob_file = Tempfile.new("blob") blob_file.write(blob) blob_file.rewind blob_file.flush self.source = blob_file.path convert remove_file([blob_file]) rescue raise RTesseract::ConversionError end |
#image_name ⇒ Object
59 60 61 |
# File 'lib/rtesseract.rb', line 59 def image_name @source.basename end |
#remove_file(files = []) ⇒ Object
Remove files
71 72 73 74 75 76 77 78 79 |
# File 'lib/rtesseract.rb', line 71 def remove_file(files=[]) files.each do |file| file.close file.unlink end true rescue raise RTesseract::TempFilesNotRemovedError end |
#source=(src) ⇒ Object
54 55 56 57 |
# File 'lib/rtesseract.rb', line 54 def source= src @value = "" @source = Pathname.new src end |
#to_s ⇒ Object
Output value
160 161 162 163 164 165 166 167 168 |
# File 'lib/rtesseract.rb', line 160 def to_s return @value if @value != "" if @source.file? || !@instance.nil? convert @value else raise RTesseract::ImageNotSelectedError end end |
#to_s_without_spaces ⇒ Object
Remove spaces and break-lines
171 172 173 |
# File 'lib/rtesseract.rb', line 171 def to_s_without_spaces to_s.gsub(" ","").gsub("\n","").gsub("\r","") end |