Class: RTesseract::Box

Inherits:
RTesseract show all
Defined in:
lib/rtesseract/box.rb

Overview

Class to read char positions from an image

Direct Known Subclasses

BoxChar

Defined Under Namespace

Classes: BoxParser

Constant Summary

Constants inherited from RTesseract

LANGUAGES, OPTIONS

Instance Attribute Summary

Attributes inherited from RTesseract

#image_object, #lang, #options, #options_cmd, #processor, #psm, #source

Instance Method Summary collapse

Methods inherited from RTesseract

choose_processor!, #clear_console_output, #command_line_options, #config, #config_file, #convert, #convert_command, #crop!, #default_command, #from_blob, #image, #initialize, read, #read, #remove_file, #text_file, #text_file_with_ext, #to_s_without_spaces

Constructor Details

This class inherits a constructor from RTesseract

Instance Method Details

#after_convert_hookObject



36
37
38
# File 'lib/rtesseract/box.rb', line 36

def after_convert_hook
  FileUtils.mv(text_file_with_ext('.html'), text_file_with_ext) rescue nil
end

#config_hookObject



12
13
14
# File 'lib/rtesseract/box.rb', line 12

def config_hook
  @options['tessedit_create_hocr'] = 1 # Split Words configuration
end

#convert_textObject



30
31
32
33
34
# File 'lib/rtesseract/box.rb', line 30

def convert_text
  text_objects =  []
  parse_file.each { |word| text_objects << BoxParser.new(word).to_h }
  @value = text_objects
end

#file_extObject



21
22
23
# File 'lib/rtesseract/box.rb', line 21

def file_ext
  '.hocr'
end

#initialize_hookObject



8
9
10
# File 'lib/rtesseract/box.rb', line 8

def initialize_hook
  @value, @x, @y, @w, @h = [[]]
end

#parse_fileObject



25
26
27
28
# File 'lib/rtesseract/box.rb', line 25

def parse_file
  html = Nokogiri::HTML(File.read(text_file_with_ext))
  html.css('span.ocrx_word, span.ocr_word')
end

#to_sObject

Output value



41
42
43
44
45
46
47
48
49
# File 'lib/rtesseract/box.rb', line 41

def to_s
  return @value.map { |word| word[:word] } if @value != []
  if @processor.image?(@source) || @source.file?
    convert
    @value.map { |word| word[:word] }.join(' ')
  else
    fail RTesseract::ImageNotSelectedError.new(@source)
  end
end

#wordsObject



16
17
18
19
# File 'lib/rtesseract/box.rb', line 16

def words
  convert if @value == []
  @value
end