Class: RTesseract
- Inherits:
-
Object
- Object
- RTesseract
- Defined in:
- lib/rtesseract.rb,
lib/rtesseract/box.rb,
lib/rtesseract/mixed.rb,
lib/rtesseract/errors.rb,
lib/rtesseract/box_char.rb,
lib/rtesseract/configuration.rb
Overview
Configuration
Direct Known Subclasses
Defined Under Namespace
Classes: Box, BoxChar, Configuration, ConversionError, ErrorWithMemory, ImageNotSelectedError, Mixed, TempFilesNotRemovedError
Constant Summary collapse
- LANGUAGES =
Aliases to languages names
{ 'eng' => %w(en en-us english), 'ita' => %w(it), 'por' => %w(pt pt-br portuguese), 'spa' => %w(sp) }
Class Attribute Summary collapse
-
.configuration ⇒ Object
Returns the value of attribute configuration.
Instance Attribute Summary collapse
-
#configuration ⇒ Object
Returns the value of attribute configuration.
-
#processor ⇒ Object
readonly
Returns the value of attribute processor.
-
#source ⇒ Object
Returns the value of attribute source.
Class Method Summary collapse
- .choose_processor!(processor) ⇒ Object
- .configure {|configuration| ... } ⇒ Object
- .default_command ⇒ Object
-
.local_config(options = {}) ⇒ Object
Local config to instance.
- .read(src = nil, options = {}) {|image| ... } ⇒ Object
Instance Method Summary collapse
- #after_convert_hook ⇒ Object
-
#clear_console_output ⇒ Object
TODO: Clear console for MacOS or Windows.
- #config ⇒ Object
- #config_file ⇒ Object
- #config_hook ⇒ Object
-
#convert ⇒ Object
Convert image to string.
- #convert_command ⇒ Object
- #convert_text ⇒ Object
-
#crop!(_points = {}) ⇒ Object
Crop image to convert.
- #file_ext ⇒ Object
-
#from_blob(blob, ext = '') ⇒ Object
Read image from memory blob.
- #image ⇒ Object
-
#initialize(src = '', options = {}) ⇒ RTesseract
constructor
A new instance of RTesseract.
- #initialize_hook ⇒ Object
-
#lang ⇒ Object
Select the language ===Languages * eng - English * deu - German * deu-f - German fraktur * fra - French * ita - Italian * nld - Dutch * por - Portuguese * spa - Spanish * vie - Vietnamese Note: Make sure you have installed the language to tesseract.
-
#options_cmd ⇒ Object
Options on line.
-
#psm ⇒ Object
Page Segment Mode.
- #read ⇒ Object
-
#remove_file(files = []) ⇒ Object
Remove files.
-
#tessdata_dir ⇒ Object
Tessdata Dir.
- #text_file ⇒ Object
- #text_file_with_ext(ext = nil) ⇒ Object
-
#to_s ⇒ Object
Output value.
-
#to_s_without_spaces ⇒ Object
Remove spaces and break-lines.
-
#user_patterns ⇒ Object
User Patterns.
-
#user_words ⇒ Object
User Words.
Constructor Details
#initialize(src = '', options = {}) ⇒ RTesseract
Returns a new instance of RTesseract.
23 24 25 26 27 28 29 30 |
# File 'lib/rtesseract.rb', line 23 def initialize(src = '', = {}) self.configuration = RTesseract.local_config() @options = || {} @value, @points = [nil, {}] @processor = RTesseract.choose_processor!(self.configuration.processor) @source = @processor.image?(src) ? src : Pathname.new(src) initialize_hook end |
Class Attribute Details
.configuration ⇒ Object
Returns the value of attribute configuration.
33 34 35 |
# File 'lib/rtesseract/configuration.rb', line 33 def configuration @configuration end |
Instance Attribute Details
#configuration ⇒ Object
Returns the value of attribute configuration.
19 20 21 |
# File 'lib/rtesseract.rb', line 19 def configuration @configuration end |
#processor ⇒ Object (readonly)
Returns the value of attribute processor.
20 21 22 |
# File 'lib/rtesseract.rb', line 20 def processor @processor end |
#source ⇒ Object
Returns the value of attribute source.
21 22 23 |
# File 'lib/rtesseract.rb', line 21 def source @source end |
Class Method Details
.choose_processor!(processor) ⇒ Object
226 227 228 229 230 231 232 233 234 235 236 237 |
# File 'lib/rtesseract.rb', line 226 def self.choose_processor!(processor) processor = if MiniMagickProcessor.a_name?(processor.to_s) MiniMagickProcessor elsif NoneProcessor.a_name?(processor.to_s) NoneProcessor else RMagickProcessor end processor.setup processor end |
.configure {|configuration| ... } ⇒ Object
36 37 38 39 |
# File 'lib/rtesseract/configuration.rb', line 36 def self.configure self.configuration ||= Configuration.new yield(configuration) end |
.default_command ⇒ Object
41 42 43 44 45 |
# File 'lib/rtesseract/configuration.rb', line 41 def self.default_command TesseractBin::Executables[:tesseract] || 'tesseract' rescue 'tesseract' end |
.local_config(options = {}) ⇒ Object
Local config to instance
48 49 50 51 52 53 54 55 56 |
# File 'lib/rtesseract/configuration.rb', line 48 def self.local_config( = {}) RTesseract::Configuration.new.tap do |config| config.command = config.option(, :command, RTesseract.default_command) config.processor = config.option(, :processor, 'rmagick') config.(, [ :lang, :psm, :tessdata_dir, :user_words, :user_patterns ]) config.debug = config.option(, :debug, false) config. = [.option(:options, nil)].flatten.compact end end |
.read(src = nil, options = {}) {|image| ... } ⇒ Object
35 36 37 38 39 40 41 42 |
# File 'lib/rtesseract.rb', line 35 def self.read(src = nil, = {}) fail RTesseract::ImageNotSelectedError if src.nil? processor = RTesseract.choose_processor!(.option(:processor, nil)) image = processor.read_with_processor(src.to_s) yield(image) object = RTesseract.new('', ).from_blob(image.to_blob) object end |
Instance Method Details
#after_convert_hook ⇒ Object
182 183 |
# File 'lib/rtesseract.rb', line 182 def after_convert_hook end |
#clear_console_output ⇒ Object
TODO: Clear console for MacOS or Windows
153 154 155 156 |
# File 'lib/rtesseract.rb', line 153 def clear_console_output return '' if self.configuration.debug return '2>/dev/null' if File.exist?('/dev/null') # Linux console clear end |
#config ⇒ Object
137 138 139 140 141 |
# File 'lib/rtesseract.rb', line 137 def config @options ||= {} config_hook @options.map { |k, v| "#{k} #{v}" }.join("\n") end |
#config_file ⇒ Object
143 144 145 146 147 148 149 150 |
# File 'lib/rtesseract.rb', line 143 def config_file config_hook return '' if @options == {} conf = Tempfile.new('config') conf.write(config) conf.flush conf.path end |
#config_hook ⇒ Object
134 135 |
# File 'lib/rtesseract.rb', line 134 def config_hook end |
#convert ⇒ Object
Convert image to string
186 187 188 189 190 191 192 193 |
# File 'lib/rtesseract.rb', line 186 def convert convert_command after_convert_hook convert_text remove_file([@image, text_file_with_ext]) rescue => error raise RTesseract::ConversionError.new(error), error, caller end |
#convert_command ⇒ Object
174 175 176 |
# File 'lib/rtesseract.rb', line 174 def convert_command `#{self.configuration.command} "#{image}" "#{text_file}" #{lang} #{psm} #{tessdata_dir} #{user_words} #{user_patterns} #{config_file} #{clear_console_output} #{self.configuration..join(' ')}` end |
#convert_text ⇒ Object
178 179 180 |
# File 'lib/rtesseract.rb', line 178 def convert_text @value = File.read(text_file_with_ext).to_s end |
#crop!(_points = {}) ⇒ Object
Crop image to convert
57 58 59 60 61 |
# File 'lib/rtesseract.rb', line 57 def crop!(_points = {}) @value = nil @points = _points self end |
#file_ext ⇒ Object
162 163 164 |
# File 'lib/rtesseract.rb', line 162 def file_ext '.txt' end |
#from_blob(blob, ext = '') ⇒ Object
Read image from memory blob
196 197 198 199 200 201 202 203 204 205 206 207 |
# File 'lib/rtesseract.rb', line 196 def from_blob(blob, ext = '') blob_file = Tempfile.new(['blob', ext], encoding: 'ascii-8bit') blob_file.binmode.write(blob) blob_file.rewind blob_file.flush self.source = blob_file.path convert remove_file([blob_file]) self rescue => error raise RTesseract::ConversionError.new(error), error, caller end |
#image ⇒ Object
158 159 160 |
# File 'lib/rtesseract.rb', line 158 def image (@image = @processor.image_to_tif(@source, @points)).path end |
#initialize_hook ⇒ Object
32 33 |
# File 'lib/rtesseract.rb', line 32 def initialize_hook end |
#lang ⇒ Object
Select the language
Languages
-
eng - English
-
deu - German
-
deu-f - German fraktur
-
fra - French
-
ita - Italian
-
nld - Dutch
-
por - Portuguese
-
spa - Spanish
-
vie - Vietnamese
Note: Make sure you have installed the language to tesseract
90 91 92 93 94 95 96 97 98 99 |
# File 'lib/rtesseract.rb', line 90 def lang language = "#{self.configuration.lang}".strip.downcase LANGUAGES.each do |value, names| return " -l #{value} " if names.include? language end return " -l #{language} " if language.size > 0 '' rescue '' end |
#options_cmd ⇒ Object
Options on line
130 131 132 |
# File 'lib/rtesseract.rb', line 130 def self.configuration. end |
#psm ⇒ Object
Page Segment Mode
102 103 104 105 106 |
# File 'lib/rtesseract.rb', line 102 def psm (self.configuration.psm.nil? ? '' : " -psm #{self.configuration.psm} ") rescue '' end |
#read ⇒ Object
44 45 46 47 48 49 |
# File 'lib/rtesseract.rb', line 44 def read image = @processor.read_with_processor(@source.to_s) new_image = yield(image) from_blob(new_image.to_blob, File.extname(@source.to_s)) self end |
#remove_file(files = []) ⇒ Object
Remove files
64 65 66 67 68 69 70 71 72 73 74 75 76 |
# File 'lib/rtesseract.rb', line 64 def remove_file(files = []) files.each do |file| if file.is_a?(Tempfile) file.close file.unlink else File.unlink(file) end end true rescue => error raise RTesseract::TempFilesNotRemovedError.new(error: error, files: files) end |
#tessdata_dir ⇒ Object
Tessdata Dir
109 110 111 112 113 |
# File 'lib/rtesseract.rb', line 109 def tessdata_dir (self.configuration.tessdata_dir.nil? ? '' : " --tessdata-dir #{self.configuration.tessdata_dir} ") rescue '' end |
#text_file ⇒ Object
166 167 168 |
# File 'lib/rtesseract.rb', line 166 def text_file @text_file = Pathname.new(Dir.tmpdir).join("#{Time.now.to_f}#{rand(1500)}").to_s end |
#text_file_with_ext(ext = nil) ⇒ Object
170 171 172 |
# File 'lib/rtesseract.rb', line 170 def text_file_with_ext(ext = nil) [@text_file, ext || file_ext].join('') end |
#to_s ⇒ Object
Output value
210 211 212 213 214 215 216 217 218 219 |
# File 'lib/rtesseract.rb', line 210 def to_s return @value if @value != nil if @processor.image?(@source) || @source.file? convert @value else fail RTesseract::ImageNotSelectedError.new(@source) end end |
#to_s_without_spaces ⇒ Object
Remove spaces and break-lines
222 223 224 |
# File 'lib/rtesseract.rb', line 222 def to_s_without_spaces to_s.gsub(' ', '').gsub("\n", '').gsub("\r", '') end |
#user_patterns ⇒ Object
User Patterns
123 124 125 126 127 |
# File 'lib/rtesseract.rb', line 123 def user_patterns (self.configuration.user_patterns.nil? ? '' : " --user-patterns #{self.configuration.user_patterns} ") rescue '' end |
#user_words ⇒ Object
User Words
116 117 118 119 120 |
# File 'lib/rtesseract.rb', line 116 def user_words (self.configuration.user_words.nil? ? '' : " --user-words #{self.configuration.user_words} ") rescue '' end |