Class: RTesseract

Inherits:
Object
  • Object
show all
Defined in:
lib/rtesseract.rb,
lib/rtesseract/box.rb,
lib/rtesseract/pdf.rb,
lib/rtesseract/tsv.rb,
lib/rtesseract/base.rb,
lib/rtesseract/text.rb,
lib/rtesseract/check.rb,
lib/rtesseract/command.rb,
lib/rtesseract/version.rb,
lib/rtesseract/configuration.rb

Defined Under Namespace

Modules: Base, Box, Pdf, Text, Tsv Classes: Command, Configuration, Error

Constant Summary collapse

VERSION =
'3.0.4'.freeze

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(src = '', options = {}) ⇒ RTesseract

Returns a new instance of RTesseract.



15
16
17
18
19
# File 'lib/rtesseract.rb', line 15

def initialize(src = '', options = {})
  @source = src
  @config = RTesseract.config.merge(options)
  @errors = []
end

Instance Attribute Details

#configObject (readonly)

Returns the value of attribute config.



13
14
15
# File 'lib/rtesseract.rb', line 13

def config
  @config
end

#errorsObject (readonly)

Returns the value of attribute errors.



47
48
49
# File 'lib/rtesseract.rb', line 47

def errors
  @errors
end

#sourceObject (readonly)

Returns the value of attribute source.



13
14
15
# File 'lib/rtesseract.rb', line 13

def source
  @source
end

Class Method Details

.check_version!Object

Raises:



9
10
11
# File 'lib/rtesseract/check.rb', line 9

def check_version!
  raise RTesseract::Error, 'Tesseract OCR 3.5 or later not installed' if RTesseract.tesseract_version < 3.05
end

.configObject



15
16
17
18
19
20
# File 'lib/rtesseract/configuration.rb', line 15

def config
  @config ||= RTesseract::Configuration.new(
    command: 'tesseract',
    debug_file: '/dev/null'
  )
end

.configure {|config| ... } ⇒ Object

Yields:



22
23
24
# File 'lib/rtesseract/configuration.rb', line 22

def configure
  yield(config) if block_given?
end

.reset_config!Object



26
27
28
# File 'lib/rtesseract/configuration.rb', line 26

def reset_config!
  @config = nil
end

.tesseract_versionObject



3
4
5
6
7
# File 'lib/rtesseract/check.rb', line 3

def tesseract_version
  Open3.capture2e(RTesseract.config.command, '--version').first.to_s.match(/\d+.\d+/)[0].to_f
rescue Errno::ENOENT
  0
end

Instance Method Details

#to_boxObject



21
22
23
# File 'lib/rtesseract.rb', line 21

def to_box
  Box.run(@source, @errors, config)
end

#to_pdfObject



29
30
31
# File 'lib/rtesseract.rb', line 29

def to_pdf
  Pdf.run(@source, @errors, config)
end

#to_sObject

Output value



38
39
40
# File 'lib/rtesseract.rb', line 38

def to_s
  Text.run(@source, @errors, config)
end

#to_s_without_spacesObject

Remove spaces and break-lines



43
44
45
# File 'lib/rtesseract.rb', line 43

def to_s_without_spaces
  to_s.gsub(/\s/, '')
end

#to_tsvObject



33
34
35
# File 'lib/rtesseract.rb', line 33

def to_tsv
  Tsv.run(@source, @errors, config)
end

#wordsObject



25
26
27
# File 'lib/rtesseract.rb', line 25

def words
  to_box.map { |word| word[:word] }
end