Class: RTesseract

Inherits:
Object
  • Object
show all
Defined in:
lib/rtesseract.rb,
lib/rtesseract/box.rb,
lib/rtesseract/pdf.rb,
lib/rtesseract/tsv.rb,
lib/rtesseract/base.rb,
lib/rtesseract/text.rb,
lib/rtesseract/check.rb,
lib/rtesseract/command.rb,
lib/rtesseract/version.rb,
lib/rtesseract/configuration.rb

Defined Under Namespace

Modules: Base, Box, Pdf, Text, Tsv Classes: Command, Configuration, Error

Constant Summary collapse

VERSION =
'3.1.2'

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(src = '', options = {}) ⇒ RTesseract

Returns a new instance of RTesseract.



17
18
19
20
21
# File 'lib/rtesseract.rb', line 17

def initialize(src = '', options = {})
  @source = src
  @config = RTesseract.config.merge(options)
  @errors = []
end

Instance Attribute Details

#configObject (readonly)

Returns the value of attribute config.



15
16
17
# File 'lib/rtesseract.rb', line 15

def config
  @config
end

#errorsObject (readonly)

Returns the value of attribute errors.



49
50
51
# File 'lib/rtesseract.rb', line 49

def errors
  @errors
end

#sourceObject (readonly)

Returns the value of attribute source.



15
16
17
# File 'lib/rtesseract.rb', line 15

def source
  @source
end

Class Method Details

.check_version!Object

Raises:



11
12
13
# File 'lib/rtesseract/check.rb', line 11

def check_version!
  raise RTesseract::Error, 'Tesseract OCR 3.5 or later not installed' if RTesseract.tesseract_version < 3.05
end

.configObject



17
18
19
20
21
22
# File 'lib/rtesseract/configuration.rb', line 17

def config
  @config ||= RTesseract::Configuration.new(
    command: 'tesseract',
    debug_file: '/dev/null'
  )
end

.configure {|config| ... } ⇒ Object

Yields:



24
25
26
# File 'lib/rtesseract/configuration.rb', line 24

def configure
  yield(config) if block_given?
end

.reset_config!Object



28
29
30
# File 'lib/rtesseract/configuration.rb', line 28

def reset_config!
  @config = nil
end

.tesseract_versionObject



5
6
7
8
9
# File 'lib/rtesseract/check.rb', line 5

def tesseract_version
  Open3.capture2e(RTesseract.config.command, '--version').first.to_s.match(/\d+.\d+/)[0].to_f
rescue Errno::ENOENT
  0
end

Instance Method Details

#to_boxObject



23
24
25
# File 'lib/rtesseract.rb', line 23

def to_box
  Box.run(@source, @errors, config)
end

#to_pdfObject



31
32
33
# File 'lib/rtesseract.rb', line 31

def to_pdf
  Pdf.run(@source, @errors, config)
end

#to_sObject

Output value



40
41
42
# File 'lib/rtesseract.rb', line 40

def to_s
  Text.run(@source, @errors, config)
end

#to_s_without_spacesObject

Remove spaces and break-lines



45
46
47
# File 'lib/rtesseract.rb', line 45

def to_s_without_spaces
  to_s.gsub(/\s/, '')
end

#to_tsvObject



35
36
37
# File 'lib/rtesseract.rb', line 35

def to_tsv
  Tsv.run(@source, @errors, config)
end

#wordsObject



27
28
29
# File 'lib/rtesseract.rb', line 27

def words
  to_box.map { |word| word[:word] }
end