Method: PDFextract.extract_ocr
- Defined in:
- lib/pdf_extract.rb
.extract_ocr(image_path, coords) ⇒ Object
188 189 190 191 192 193 194 195 196 197 198 199 200 201 |
# File 'lib/pdf_extract.rb', line 188 def self.extract_ocr(image_path,coords) x = coords["x1"] y = coords["y1"] width = coords["x2"] - x height = coords["y2"] - y puts image_path puts [x,y,width,height] engine = Tesseract::Engine.new(language: :eng) engine.image = image_path engine.select x,y,width,height text = engine.text.strip return text end |