Class: RTesseract::Box
Overview
Class to read char positions from an image
Defined Under Namespace
Classes: BoxParser
Constant Summary
Constants inherited
from RTesseract
LANGUAGES
Instance Attribute Summary
Attributes inherited from RTesseract
#configuration, #processor, #source
Instance Method Summary
collapse
Methods inherited from RTesseract
choose_processor!, #clear_console_output, #config, #config_file, configure, #convert, #convert_command, #crop!, default_command, #from_blob, #image, #initialize, #lang, local_config, #options_cmd, #psm, read, #read, #remove_file, #tessdata_dir, #text_file, #text_file_with_ext, #to_s_without_spaces, #user_patterns, #user_words
Constructor Details
This class inherits a constructor from RTesseract
Instance Method Details
#after_convert_hook ⇒ Object
36
37
38
|
# File 'lib/rtesseract/box.rb', line 36
def after_convert_hook
FileUtils.mv(text_file_with_ext('.html'), text_file_with_ext) rescue nil
end
|
#config_hook ⇒ Object
12
13
14
|
# File 'lib/rtesseract/box.rb', line 12
def config_hook
@options['tessedit_create_hocr'] = 1 end
|
#convert_text ⇒ Object
30
31
32
33
34
|
# File 'lib/rtesseract/box.rb', line 30
def convert_text
text_objects = []
parse_file.each { |word| text_objects << BoxParser.new(word).to_h }
@value = text_objects
end
|
#file_ext ⇒ Object
21
22
23
|
# File 'lib/rtesseract/box.rb', line 21
def file_ext
'.hocr'
end
|
#initialize_hook ⇒ Object
8
9
10
|
# File 'lib/rtesseract/box.rb', line 8
def initialize_hook
@value, @points = [[], {}]
end
|
#parse_file ⇒ Object
25
26
27
28
|
# File 'lib/rtesseract/box.rb', line 25
def parse_file
html = Nokogiri::HTML(File.read(text_file_with_ext))
html.css('span.ocrx_word, span.ocr_word')
end
|
#to_s ⇒ Object
41
42
43
44
45
46
47
48
49
|
# File 'lib/rtesseract/box.rb', line 41
def to_s
return @value.map { |word| word[:word] } if @value != []
if @processor.image?(@source) || @source.file?
convert
@value.map { |word| word[:word] }.join(' ')
else
fail RTesseract::ImageNotSelectedError.new(@source)
end
end
|
#words ⇒ Object
16
17
18
19
|
# File 'lib/rtesseract/box.rb', line 16
def words
convert if @value == []
@value
end
|