Module: RTesseract::Box

Extended by:
Base
Defined in:
lib/rtesseract/box.rb

Class Method Summary collapse

Methods included from Base

temp_file

Class Method Details

.parse(content) ⇒ Object



15
16
17
18
19
20
21
# File 'lib/rtesseract/box.rb', line 15

def self.parse(content)
  html = Nokogiri::HTML(content)
  html.css('span.ocrx_word, span.ocr_word').map do |word|
    attributes = word.attributes['title'].value.to_s.delete(';').split(' ')
    word_info(word, attributes)
  end
end

.run(source, errors, options) ⇒ Object



7
8
9
10
11
12
13
# File 'lib/rtesseract/box.rb', line 7

def self.run(source, errors, options)
  options.tessedit_create_hocr = 1

  RTesseract::Command.new(source, temp_file, errors, options).run

  parse(File.read(temp_file('.hocr')))
end

.word_info(word, data) ⇒ Object



23
24
25
26
27
28
29
30
31
# File 'lib/rtesseract/box.rb', line 23

def self.word_info(word, data)
  {
    word: word.text,
    x_start: data[1].to_i,
    y_start: data[2].to_i,
    x_end: data[3].to_i,
    y_end: data[4].to_i
  }
end