Class: RTesseract
- Inherits:
-
Object
- Object
- RTesseract
- Defined in:
- lib/rtesseract.rb,
lib/rtesseract/mixed.rb,
lib/rtesseract/errors.rb
Defined Under Namespace
Classes: ConversionError, ImageNotSelectedError, Mixed, TempFilesNotRemovedError
Instance Attribute Summary collapse
-
#lang ⇒ Object
Select the language ===Languages * eng - English * deu - German * deu-f - German fraktur * fra - French * ita - Italian * nld - Dutch * por - Portuguese * spa - Spanish * vie - Vietnamese Note: Make sure you have installed the language to tesseract.
-
#options ⇒ Object
Returns the value of attribute options.
-
#processor ⇒ Object
readonly
Returns the value of attribute processor.
-
#psm ⇒ Object
Page Segment Mode.
Class Method Summary collapse
Instance Method Summary collapse
-
#clear_console_output ⇒ Object
TODO: Clear console for MacOS or Windows.
- #config ⇒ Object
- #config_file ⇒ Object
-
#convert ⇒ Object
Convert image to string.
-
#crop!(x, y, width, height) ⇒ Object
Crop image to convert.
- #default_command ⇒ Object
-
#from_blob(blob) ⇒ Object
Read image from memory blob.
- #image_name ⇒ Object
-
#initialize(src = "", options = {}) ⇒ RTesseract
constructor
A new instance of RTesseract.
-
#remove_file(files = []) ⇒ Object
Remove files.
- #source=(src) ⇒ Object
-
#to_s ⇒ Object
Output value.
-
#to_s_without_spaces ⇒ Object
Remove spaces and break-lines.
Constructor Details
#initialize(src = "", options = {}) ⇒ RTesseract
Returns a new instance of RTesseract.
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
# File 'lib/rtesseract.rb', line 13 def initialize(src = "", = {}) @command = .delete(:command) || default_command @lang = .delete(:lang) || .delete("lang") || "" @psm = .delete(:psm) || .delete("psm") || nil @clear_console_output = .delete(:clear_console_output) @clear_console_output = true if @clear_console_output.nil? @options = @value = "" @x, @y, @w, @h = [] @processor = .delete(:processor) || .delete("processor") choose_processor! if is_a_instance?(src) @source = Pathname.new '.' @instance = src else @instance = nil @source = Pathname.new src end end |
Instance Attribute Details
#lang ⇒ Object
Select the language
Languages
-
eng - English
-
deu - German
-
deu-f - German fraktur
-
fra - French
-
ita - Italian
-
nld - Dutch
-
por - Portuguese
-
spa - Spanish
-
vie - Vietnamese
Note: Make sure you have installed the language to tesseract
92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
# File 'lib/rtesseract.rb', line 92 def lang language = "#{@lang}".strip.downcase { #Aliases to languages names "eng" => ["en","en-us","english"], "ita" => ["it"], "por" => ["pt","pt-br","portuguese"], "spa" => ["sp"] }.each do |value,names| return " -l #{value} " if names.include? language end return " -l #{language} " if language.size > 0 "" rescue "" end |
#options ⇒ Object
Returns the value of attribute options.
8 9 10 |
# File 'lib/rtesseract.rb', line 8 def @options end |
#processor ⇒ Object (readonly)
Returns the value of attribute processor.
11 12 13 |
# File 'lib/rtesseract.rb', line 11 def processor @processor end |
#psm ⇒ Object
Page Segment Mode
109 110 111 112 113 |
# File 'lib/rtesseract.rb', line 109 def psm @psm.nil? ? "" : " -psm #{@psm} " rescue "" end |
Class Method Details
.read(src = nil, options = {}) {|image| ... } ⇒ Object
39 40 41 42 43 44 45 46 47 48 49 50 51 |
# File 'lib/rtesseract.rb', line 39 def self.read(src = nil, = {}, &block) raise RTesseract::ImageNotSelectedError if src == nil processor = .delete(:processor) || .delete("processor") if processor == "mini_magick" image = MiniMagickProcessor.read_with_processor(src.to_s) else image = RMagickProcessor.read_with_processor(src.to_s) end yield image object = RTesseract.new("", ) object.from_blob(image.to_blob) object end |
Instance Method Details
#clear_console_output ⇒ Object
TODO: Clear console for MacOS or Windows
129 130 131 132 |
# File 'lib/rtesseract.rb', line 129 def clear_console_output return "" unless @clear_console_output return "2>/dev/null" if File.exist?("/dev/null") #Linux console clear end |
#config ⇒ Object
115 116 117 118 |
# File 'lib/rtesseract.rb', line 115 def config @options ||= {} @options.collect{|k,v| "#{k} #{v}" }.join("\n") end |
#config_file ⇒ Object
120 121 122 123 124 125 126 |
# File 'lib/rtesseract.rb', line 120 def config_file return "" if @options == {} conf = Tempfile.new("config") conf.write(config) conf.flush conf.path end |
#convert ⇒ Object
Convert image to string
135 136 137 138 139 140 141 142 143 |
# File 'lib/rtesseract.rb', line 135 def convert path = Tempfile.new(["",".txt"]).path.to_s tmp_image = image_to_tiff `#{@command} "#{tmp_image.path}" "#{path.gsub(".txt","")}" #{lang} #{psm} #{config_file} #{clear_console_output}` @value = File.read("#{path}").to_s remove_file([tmp_image]) rescue raise RTesseract::ConversionError end |
#crop!(x, y, width, height) ⇒ Object
Crop image to convert
64 65 66 67 |
# File 'lib/rtesseract.rb', line 64 def crop!(x,y,width,height) @x, @y, @w, @h = x, y, width, height self end |
#default_command ⇒ Object
33 34 35 36 37 |
# File 'lib/rtesseract.rb', line 33 def default_command TesseractBin::Executables[:tesseract] || 'tesseract' rescue "tesseract" end |
#from_blob(blob) ⇒ Object
Read image from memory blob
146 147 148 149 150 151 152 153 154 155 156 |
# File 'lib/rtesseract.rb', line 146 def from_blob(blob) blob_file = Tempfile.new("blob") blob_file.write(blob) blob_file.rewind blob_file.flush self.source = blob_file.path convert remove_file([blob_file]) rescue raise RTesseract::ConversionError end |
#image_name ⇒ Object
58 59 60 |
# File 'lib/rtesseract.rb', line 58 def image_name @source.basename end |
#remove_file(files = []) ⇒ Object
Remove files
70 71 72 73 74 75 76 77 78 |
# File 'lib/rtesseract.rb', line 70 def remove_file(files=[]) files.each do |file| file.close file.unlink end true rescue raise RTesseract::TempFilesNotRemovedError end |
#source=(src) ⇒ Object
53 54 55 56 |
# File 'lib/rtesseract.rb', line 53 def source= src @value = "" @source = Pathname.new src end |
#to_s ⇒ Object
Output value
159 160 161 162 163 164 165 166 167 |
# File 'lib/rtesseract.rb', line 159 def to_s return @value if @value != "" if @source.file? || @instance.present? convert @value else raise RTesseract::ImageNotSelectedError end end |
#to_s_without_spaces ⇒ Object
Remove spaces and break-lines
170 171 172 |
# File 'lib/rtesseract.rb', line 170 def to_s_without_spaces to_s.gsub(" ","").gsub("\n","").gsub("\r","") end |