Method: PDFextract.extract_ocr

Defined in:
lib/pdf_extract.rb

.extract_ocr(image_path, coords) ⇒ Object



188
189
190
191
192
193
194
195
196
197
198
199
200
201
# File 'lib/pdf_extract.rb', line 188

def self.extract_ocr(image_path,coords)
  
  x = coords["x1"]
  y = coords["y1"]
  width = coords["x2"] - x
  height = coords["y2"] - y
  puts image_path
  puts [x,y,width,height]
  engine = Tesseract::Engine.new(language: :eng)
  engine.image = image_path
  engine.select x,y,width,height
  text = engine.text.strip
  return text
end