Module: RTesseract::Box

Extended by:
Base
Defined in:
lib/rtesseract/box.rb

Class Method Summary collapse

Methods included from Base

temp_file

Class Method Details

.parse(content) ⇒ Object



15
16
17
18
19
20
21
22
23
24
25
26
27
28
# File 'lib/rtesseract/box.rb', line 15

def self.parse(content)
  html = Nokogiri::HTML(content)
  html.css('span.ocrx_word, span.ocr_word').map do |word|
    @attributes = word.attributes['title'].value.to_s.gsub(';', '').split(' ')

    {
      word: word.text,
      x_start: @attributes[1].to_i,
      y_start: @attributes[2].to_i,
      x_end: @attributes[3].to_i,
      y_end: @attributes[4].to_i
    }
  end
end

.run(source, options) ⇒ Object



7
8
9
10
11
12
13
# File 'lib/rtesseract/box.rb', line 7

def self.run(source, options)
  options.tessedit_create_hocr = 1

  RTesseract::Command.new(source, temp_file, options).run

  parse(File.read(temp_file('.hocr')))
end