Class: RTesseract::BoxChar

Inherits:
Box show all
Defined in:
lib/rtesseract/box_char.rb

Overview

Class to read char positions from an image

Constant Summary

Constants inherited from RTesseract

LANGUAGES

Instance Attribute Summary

Attributes inherited from RTesseract

#configuration, #processor, #source

Instance Method Summary collapse

Methods inherited from Box

#after_convert_hook, #initialize_hook, #to_s, #words

Methods inherited from RTesseract

#after_convert_hook, #clean, #clear_console_output, clear_pdf_option, #config, #config_file, configure, #convert, #convert_command, #convert_pdf, #convert_result, #crop!, default_command, #file_dest, #file_with_ext, #from_blob, #image, #initialize, #initialize_hook, #lang, local_config, #oem, #option_to_string, #options_cmd, #pdf?, #psm, #read, read, #tessdata_dir, #tesseract_version, #to_pdf, #to_s, #to_s_without_spaces, #user_patterns, #user_words

Constructor Details

This class inherits a constructor from RTesseract

Instance Method Details

#config_hookObject



6
7
8
# File 'lib/rtesseract/box_char.rb', line 6

def config_hook
  @options['tessedit_create_boxfile'] = 1 # Split chars
end

#convert_textObject



22
23
24
25
26
27
28
29
# File 'lib/rtesseract/box_char.rb', line 22

def convert_text
  text_objects = []
  parse_file.each_line do |line|
    char, x_start, y_start, x_end, y_end, _word = line.split(' ')
    text_objects << { char: char, x_start: x_start.to_i, y_start: y_start.to_i, x_end: x_end.to_i, y_end: y_end.to_i }
  end
  @value = text_objects
end

#file_extObject

Extension of file



13
14
15
# File 'lib/rtesseract/box_char.rb', line 13

def file_ext
  '.box'
end

#parse_fileObject

Read the result file



18
19
20
# File 'lib/rtesseract/box_char.rb', line 18

def parse_file
  File.read(file_with_ext).to_s
end