Module: RTesseract::Box

Defined in:
lib/rtesseract/box.rb

Class Method Summary collapse

Class Method Details

.parse(content) ⇒ Object



19
20
21
22
23
24
25
26
27
28
29
30
31
32
# File 'lib/rtesseract/box.rb', line 19

def self.parse(content)
  html = Nokogiri::HTML(content)
  html.css('span.ocrx_word, span.ocr_word').map do |word|
    @attributes = word.attributes['title'].value.to_s.gsub(';', '').split(' ')

    {
      word: word.text,
      x_start: @attributes[1].to_i,
      y_start: @attributes[2].to_i,
      x_end: @attributes[3].to_i,
      y_end: @attributes[4].to_i
    }
  end
end

.run(source, options) ⇒ Object



10
11
12
13
14
15
16
17
# File 'lib/rtesseract/box.rb', line 10

def self.run(source, options)
  name = "rtesseract_#{SecureRandom.uuid}"
  options.tessedit_create_hocr = 1

  RTesseract::Command.new(source, temp_dir.join(name).to_s, options).run

  parse(temp_dir.join("#{name}.hocr").read)
end

.temp_dirObject



6
7
8
# File 'lib/rtesseract/box.rb', line 6

def self.temp_dir
  @file_path = Pathname.new(Dir.tmpdir)
end