Class: RTesseract

Inherits:
Object
  • Object
show all
Defined in:
lib/rtesseract.rb,
lib/rtesseract/box.rb,
lib/rtesseract/pdf.rb,
lib/rtesseract/tsv.rb,
lib/rtesseract/base.rb,
lib/rtesseract/text.rb,
lib/rtesseract/check.rb,
lib/rtesseract/command.rb,
lib/rtesseract/version.rb,
lib/rtesseract/configuration.rb

Defined Under Namespace

Modules: Base, Box, Pdf, Text, Tsv Classes: Command, Configuration, Error

Constant Summary collapse

VERSION =
'3.0.2'.freeze

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(src = '', options = {}) ⇒ RTesseract

Returns a new instance of RTesseract.



15
16
17
18
# File 'lib/rtesseract.rb', line 15

def initialize(src = '', options = {})
  @source = src
  @config = RTesseract.config.merge(options)
end

Instance Attribute Details

#configObject (readonly)

Returns the value of attribute config.



13
14
15
# File 'lib/rtesseract.rb', line 13

def config
  @config
end

#sourceObject (readonly)

Returns the value of attribute source.



13
14
15
# File 'lib/rtesseract.rb', line 13

def source
  @source
end

Class Method Details

.check_version!Object

Raises:



10
11
12
# File 'lib/rtesseract/check.rb', line 10

def check_version!
  raise RTesseract::Error.new('Tesseract OCR 3.5 or later not installed') if RTesseract.tesseract_version < 3.05
end

.configObject



11
12
13
14
15
16
# File 'lib/rtesseract/configuration.rb', line 11

def config
  @config ||= RTesseract::Configuration.new(
                command: 'tesseract',
                debug_file: '/dev/null'
              )
end

.configure {|config| ... } ⇒ Object

Yields:



18
19
20
# File 'lib/rtesseract/configuration.rb', line 18

def configure
  yield(config) if block_given?
end

.reset_config!Object



22
23
24
# File 'lib/rtesseract/configuration.rb', line 22

def reset_config!
  @config = nil
end

.tesseract_versionObject



4
5
6
7
8
# File 'lib/rtesseract/check.rb', line 4

def tesseract_version
  Open3.capture2e(RTesseract.config.command, "--version").first.to_s.match(/\d+.\d+/)[0].to_f
rescue Errno::ENOENT
  0
end

Instance Method Details

#to_boxObject



20
21
22
# File 'lib/rtesseract.rb', line 20

def to_box
  Box.run(@source, config)
end

#to_pdfObject



28
29
30
# File 'lib/rtesseract.rb', line 28

def to_pdf
  Pdf.run(@source, config)
end

#to_sObject

Output value



37
38
39
# File 'lib/rtesseract.rb', line 37

def to_s
  Text.run(@source, config)
end

#to_s_without_spacesObject

Remove spaces and break-lines



42
43
44
# File 'lib/rtesseract.rb', line 42

def to_s_without_spaces
  to_s.gsub(/\s/, '')
end

#to_tsvObject



32
33
34
# File 'lib/rtesseract.rb', line 32

def to_tsv
  Tsv.run(@source, config)
end

#wordsObject



24
25
26
# File 'lib/rtesseract.rb', line 24

def words
  to_box.map { |word| word[:word] }
end