Module: RTesseract::Box
- Defined in:
- lib/rtesseract/box.rb
Class Method Summary collapse
Class Method Details
.parse(content) ⇒ Object
19 20 21 22 23 24 25 26 27 28 29 30 31 32 |
# File 'lib/rtesseract/box.rb', line 19 def self.parse(content) html = Nokogiri::HTML(content) html.css('span.ocrx_word, span.ocr_word').map do |word| @attributes = word.attributes['title'].value.to_s.gsub(';', '').split(' ') { word: word.text, x_start: @attributes[1].to_i, y_start: @attributes[2].to_i, x_end: @attributes[3].to_i, y_end: @attributes[4].to_i } end end |
.run(source, options) ⇒ Object
10 11 12 13 14 15 16 17 |
# File 'lib/rtesseract/box.rb', line 10 def self.run(source, ) name = "rtesseract_#{SecureRandom.uuid}" .tessedit_create_hocr = 1 RTesseract::Command.new(source, temp_dir.join(name).to_s, ).run parse(temp_dir.join("#{name}.hocr").read) end |
.temp_dir ⇒ Object
6 7 8 |
# File 'lib/rtesseract/box.rb', line 6 def self.temp_dir @file_path = Pathname.new(Dir.tmpdir) end |