Module: RTesseract::Box
- Extended by:
- Base
- Defined in:
- lib/rtesseract/box.rb
Class Method Summary collapse
Methods included from Base
Class Method Details
.parse(content) ⇒ Object
15 16 17 18 19 20 21 |
# File 'lib/rtesseract/box.rb', line 15 def self.parse(content) html = Nokogiri::HTML(content) html.css('span.ocrx_word, span.ocr_word').map do |word| attributes = word.attributes['title'].value.to_s.delete(';').split(' ') word_info(word, attributes) end end |
.run(source, errors, options) ⇒ Object
7 8 9 10 11 12 13 |
# File 'lib/rtesseract/box.rb', line 7 def self.run(source, errors, ) .tessedit_create_hocr = 1 RTesseract::Command.new(source, temp_file, errors, ).run parse(File.read(temp_file('.hocr'))) end |
.word_info(word, data) ⇒ Object
23 24 25 26 27 28 29 30 31 |
# File 'lib/rtesseract/box.rb', line 23 def self.word_info(word, data) { word: word.text, x_start: data[1].to_i, y_start: data[2].to_i, x_end: data[3].to_i, y_end: data[4].to_i } end |