Class: HocrTurtletext::Reader
- Inherits:
-
Object
- Object
- HocrTurtletext::Reader
- Defined in:
- lib/hocr_turtletext/reader.rb
Overview
pdf-reader-turtletext methods such as text_in_region, text_position and fuzzed_y method modified from the original at github.com/tardate/pdf-reader-turtletext
Instance Method Summary collapse
- #bounding_box(&block) ⇒ Object
- #content ⇒ Object
-
#initialize(hocr_path, options = {}) ⇒ Reader
constructor
A new instance of Reader.
- #text_in_region(xmin, xmax, ymin, ymax, inclusive = false) ⇒ Object
- #text_position(text) ⇒ Object
Constructor Details
#initialize(hocr_path, options = {}) ⇒ Reader
Returns a new instance of Reader.
6 7 8 9 |
# File 'lib/hocr_turtletext/reader.rb', line 6 def initialize(hocr_path, = {}) @hocr_path = hocr_path @options = end |
Instance Method Details
#bounding_box(&block) ⇒ Object
53 54 55 |
# File 'lib/hocr_turtletext/reader.rb', line 53 def bounding_box(&block) HocrTurtletext::Textangle.new(self,&block) end |
#content ⇒ Object
11 12 13 14 15 16 |
# File 'lib/hocr_turtletext/reader.rb', line 11 def content hocr_content = File.read(@hocr_path) lines = precise_content(hocr_content) pos_hash = to_pos_hash(lines) fuzzed_y(pos_hash) end |
#text_in_region(xmin, xmax, ymin, ymax, inclusive = false) ⇒ Object
18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
# File 'lib/hocr_turtletext/reader.rb', line 18 def text_in_region(xmin,xmax,ymin,ymax,inclusive=false) return [] unless xmin && xmax && ymin && ymax text_map = content box = [] text_map.each do |y,text_row| if inclusive ? (y >= ymin && y <= ymax) : (y > ymin && y < ymax) row = [] text_row.each do |x,element| if inclusive ? (x >= xmin && x <= xmax) : (x > xmin && x < xmax) row << element end end box << row unless row.empty? end end box end |
#text_position(text) ⇒ Object
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
# File 'lib/hocr_turtletext/reader.rb', line 37 def text_position(text) item = if text.class <= Regexp content.map do |k,v| if x = v.reduce(nil){|memo,vv| memo = (vv[1] =~ text) ? vv[0] : memo } [k,x] end end else content.map {|k,v| if x = v.rassoc(text) ; [k,x] ; end } end item = item.compact.flatten unless item.empty? { :x => item[1], :y => item[0] } end end |