Class: RTesseract

Inherits:
Object
  • Object
show all
Defined in:
lib/rtesseract.rb,
lib/rtesseract/box.rb,
lib/rtesseract/pdf.rb,
lib/rtesseract/tsv.rb,
lib/rtesseract/text.rb,
lib/rtesseract/check.rb,
lib/rtesseract/command.rb,
lib/rtesseract/version.rb,
lib/rtesseract/configuration.rb

Defined Under Namespace

Modules: Box, Pdf, Text, Tsv Classes: Command, Configuration, Error

Constant Summary collapse

VERSION =
'3.0.0'.freeze

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(src = '', options = {}) ⇒ RTesseract

Returns a new instance of RTesseract.



16
17
18
19
# File 'lib/rtesseract.rb', line 16

def initialize(src = '', options = {})
  @source = src
  @config = RTesseract.config.merge(options)
end

Instance Attribute Details

#configObject (readonly)

Returns the value of attribute config.



14
15
16
# File 'lib/rtesseract.rb', line 14

def config
  @config
end

#sourceObject (readonly)

Returns the value of attribute source.



14
15
16
# File 'lib/rtesseract.rb', line 14

def source
  @source
end

Class Method Details

.check_version!Object

Raises:



10
11
12
# File 'lib/rtesseract/check.rb', line 10

def check_version!
  raise RTesseract::Error.new('Tesseract OCR 3.5 or later not installed') if RTesseract.tesseract_version < 3.05
end

.configObject



11
12
13
14
15
16
# File 'lib/rtesseract/configuration.rb', line 11

def config
  @config ||= RTesseract::Configuration.new(
                command: 'tesseract',
                debug_file: '/dev/null'
              )
end

.configure {|config| ... } ⇒ Object

Yields:



18
19
20
# File 'lib/rtesseract/configuration.rb', line 18

def configure
  yield(config) if block_given?
end

.reset_config!Object



22
23
24
# File 'lib/rtesseract/configuration.rb', line 22

def reset_config!
  @config = nil
end

.tesseract_versionObject



4
5
6
7
8
# File 'lib/rtesseract/check.rb', line 4

def tesseract_version
  Open3.capture2e(RTesseract.config.command, "--version").first.to_s.match(/\d+.\d+/)[0].to_f
rescue Errno::ENOENT
  0
end

Instance Method Details

#to_boxObject



21
22
23
# File 'lib/rtesseract.rb', line 21

def to_box
  Box.run(@source, config)
end

#to_pdfObject



29
30
31
# File 'lib/rtesseract.rb', line 29

def to_pdf
  Pdf.run(@source, config)
end

#to_sObject

Output value



38
39
40
# File 'lib/rtesseract.rb', line 38

def to_s
  Text.run(@source, config)
end

#to_s_without_spacesObject

Remove spaces and break-lines



43
44
45
# File 'lib/rtesseract.rb', line 43

def to_s_without_spaces
  to_s.gsub(/\s/, '')
end

#to_tsvObject



33
34
35
# File 'lib/rtesseract.rb', line 33

def to_tsv
  Tsv.run(@source, config)
end

#wordsObject



25
26
27
# File 'lib/rtesseract.rb', line 25

def words
  to_box.map { |word| word[:word] }
end