Class: Tabula::Ruling

Inherits:
Tabula.javajava.awtjava.awt.geomjava.awt.geom.Line2Djava.awt.geom.Line2D::Float
  • Object
show all
Defined in:
lib/tabula/entities/ruling.rb

Defined Under Namespace

Classes: HSegmentComparator

Constant Summary collapse

PERPENDICULAR_PIXEL_EXPAND_AMOUNT =

ok wtf are you doing, Jeremy? some PDFs (garment factory audits, precise link TK) make tables by drawing lines that very nearly intersect each other, but not quite. E.g. a horizontal line spans the table at a Y val of 100 and each vertical line (i.e. column separating ruling line) starts at 101 or 102. this is very annoying. so we check if those lines nearly overlap by expanding each pair by 2 pixels in each direction (so the vertical lines’ top becomes 99 or 100, and then the expanded versions overlap)

2
COLINEAR_OR_PARALLEL_PIXEL_EXPAND_AMOUNT =
1

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(top, left, width, height, stroking_color = nil) ⇒ Ruling

Returns a new instance of Ruling.



6
7
8
9
# File 'lib/tabula/entities/ruling.rb', line 6

def initialize(top, left, width, height, stroking_color=nil)
  super(left, top, left+width, top+height)
  self.stroking_color = stroking_color
end

Instance Attribute Details

#stroking_colorObject

Returns the value of attribute stroking_color.



4
5
6
# File 'lib/tabula/entities/ruling.rb', line 4

def stroking_color
  @stroking_color
end

Class Method Details

.collapse_oriented_rulings(lines) ⇒ Object



257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
# File 'lib/tabula/entities/ruling.rb', line 257

def self.collapse_oriented_rulings(lines)
  # lines must all be of one orientation (i.e. horizontal, vertical)

  if lines.empty?
    return []
  end

  lines.sort! {|a, b| a.position != b.position ? a.position <=> b.position : a.start <=> b.start }

  lines = lines.inject([lines.shift]) do |memo, next_line|
    last = memo.last

    # if current line colinear with next, and are "close enough": expand current line
    if next_line.position == last.position && last.nearlyIntersects?(next_line)
      memo.last.start = next_line.start < last.start ? next_line.start : last.start
      memo.last.end = next_line.end < last.end ? last.end : next_line.end
      memo
    # if next line has no length, ignore it
    elsif next_line.length == 0
      memo
    # otherwise, add it to the returned collection
    else
      memo << next_line
    end
  end
end

.crop_rulings_to_area(rulings, area) ⇒ Object

crop an enumerable of Ruling to an area



248
249
250
251
252
253
254
255
# File 'lib/tabula/entities/ruling.rb', line 248

def self.crop_rulings_to_area(rulings, area)
  rulings.reduce([]) do |memo, r|
    if r.intersects(area)
      memo << r.clone.intersect(area)
    end
    memo
  end
end

.find_intersections(horizontals, verticals) ⇒ Object

log(n) implementation of find_intersections based on people.csail.mit.edu/indyk/6.838-old/handouts/lec2.pdf



196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
# File 'lib/tabula/entities/ruling.rb', line 196

def self.find_intersections(horizontals, verticals)
  construct_treemap_t_comparator = java.util.TreeMap.java_class.constructor(java.util.Comparator)
  tree = construct_treemap_t_comparator.new_instance(HSegmentComparator.new).to_java
  sort_obj = Struct.new(:type, :pos, :obj)

  (horizontals + verticals)
    .flat_map { |r|
      r.vertical? ? sort_obj.new(:v, r.left, r) : [sort_obj.new(:hl, r.left, r),
                                                   sort_obj.new(:hr, r.right, r)]
    }
    .sort { |a,b|
      if a.pos == b.pos
        if a.type == :v && b.type == :hl
          1
        elsif a.type == :v && b.type == :hr
          -1
        elsif a.type == :hl && b.type == :v
          -1
        elsif a.type == :hr && b.type == :v
          1
        else
          a.pos <=> b.pos
        end
      else
        a.pos <=> b.pos
      end
    }
    .inject({}) { |memo, e|
      case e.type
        when :v
        tree.each { |h,_|
          i = h.intersection_point(e.obj)
          next memo if i.nil?
          memo[i] = [h.expand(PERPENDICULAR_PIXEL_EXPAND_AMOUNT),
                     e.obj.expand(PERPENDICULAR_PIXEL_EXPAND_AMOUNT)]
        }
        when :hr
          tree.remove(e.obj)
        when :hl
          tree[e.obj] = 1
      end
      memo
    }
end

Instance Method Details

#==(other) ⇒ Object



158
159
160
# File 'lib/tabula/entities/ruling.rb', line 158

def ==(other)
  return self.getX1 == other.getX1 && self.getY1 == other.getY1 && self.getX2 == other.getX2 && self.getY2 == other.getY2
end

#bottom=(v) ⇒ Object



24
25
26
# File 'lib/tabula/entities/ruling.rb', line 24

def bottom=(v)
  self.java_send :setLine, [Java::float, Java::float, Java::float, Java::float,], left, top, right, v
end

#colinear?(point) ⇒ Boolean

Returns:

  • (Boolean)


153
154
155
156
# File 'lib/tabula/entities/ruling.rb', line 153

def colinear?(point)
  point.x >= left && point.x <= right &&
    point.y >= top && point.y <= bottom
end

#endObject

Raises:

  • (NoMethodError)


53
54
55
56
# File 'lib/tabula/entities/ruling.rb', line 53

def end
  raise NoMethodError, "Oblique line #{self.inspect} has no #end method." if oblique?
  vertical? ? bottom : right
end

#end=(coord) ⇒ Object

Raises:

  • (NoMethodError)


75
76
77
78
79
80
81
82
# File 'lib/tabula/entities/ruling.rb', line 75

def end=(coord)
  raise NoMethodError, "Oblique line #{self.inspect} has no #end= method." if oblique?
  if vertical?
    self.bottom = coord
  else
    self.right = coord
  end
end

#expand(amt) ⇒ Object

Raises:

  • (NoMethodError)


120
121
122
123
124
125
126
# File 'lib/tabula/entities/ruling.rb', line 120

def expand(amt)
  raise NoMethodError, "Oblique line #{self.inspect} has no #expand method." if oblique?
  r = Ruling.new(self.top, self.left, self.width, self.height)
  r.start = r.start - amt
  r.end = r.end + amt
  r
end

#finite?Boolean

Returns:

  • (Boolean)


241
242
243
244
# File 'lib/tabula/entities/ruling.rb', line 241

def finite?
  top != ::Float::INFINITY && left != ::Float::INFINITY && bottom != ::Float::INFINITY && right != ::Float::INFINITY &&
  !top.nan? && !left.nan? && !bottom.nan? && !right.nan?
end

#heightObject



36
37
38
# File 'lib/tabula/entities/ruling.rb', line 36

def height
  bottom - top
end

#horizontal?Boolean

Returns:

  • (Boolean)


137
138
139
# File 'lib/tabula/entities/ruling.rb', line 137

def horizontal?
  top == bottom
end

#intersect(area) ⇒ Object

intersect this Ruling with a java.awt.geom.Rectangle2D



114
115
116
117
118
# File 'lib/tabula/entities/ruling.rb', line 114

def intersect(area)
  i = self.getBounds2D.createIntersection(area)
  self.java_send :setLine, [Java::float, Java::float, Java::float, Java::float,], i.getX, i.getY, i.getX + i.getWidth, i.getY + i.getHeight
  self
end

#intersection_point(other) ⇒ Object

calculate the intersection point between self and other Ruling



164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
# File 'lib/tabula/entities/ruling.rb', line 164

def intersection_point(other)
  # algo taken from http://mathworld.wolfram.com/Line-LineIntersection.html

  #self and other should always be perpendicular, since one should be horizontal and one should be vertical
  self_l  = self.expand(PERPENDICULAR_PIXEL_EXPAND_AMOUNT)
  other_l = other.expand(PERPENDICULAR_PIXEL_EXPAND_AMOUNT)

  return nil if !self_l.intersectsLine(other_l)

  horizontal, vertical = if self_l.horizontal? && other_l.vertical?
                           [self_l, other]
                         elsif self_l.vertical? && other_l.horizontal?
                           [other_l, self_l]
                         else
                           raise ArgumentError, "must be orthogonal, horizontal and vertical"
                         end


  java.awt.geom.Point2D::Float.new(vertical.getX1, horizontal.getY1)

end

#left=(v) ⇒ Object



20
21
22
# File 'lib/tabula/entities/ruling.rb', line 20

def left=(v)
  self.java_send :setLine, [Java::float, Java::float, Java::float, Java::float,], v, top, right, bottom
end

#lengthObject



129
130
131
# File 'lib/tabula/entities/ruling.rb', line 129

def length
  Math.sqrt( (self.right - self.left).abs ** 2 + (self.bottom - self.top).abs ** 2 )
end

#nearlyIntersects?(another) ⇒ Boolean

if the lines we’re comparing are colinear or parallel, we expand them by a only 1 pixel, because the expansions are additive (e.g. two vertical lines, at x = 100, with one having y2 of 98 and the other having y1 of 102 would erroneously be said to nearlyIntersect if they were each expanded by 2 (since they’d both terminate at 100). The COLINEAR_OR_PARALLEL_PIXEL_EXPAND_AMOUNT is only 1 so the total expansion is 2. A total expansion amount of 2 is empirically verified to work sometimes. It’s not a magic number from any source other than a little bit of experience.)

Returns:

  • (Boolean)


102
103
104
105
106
107
108
109
110
# File 'lib/tabula/entities/ruling.rb', line 102

def nearlyIntersects?(another)
  if self.intersectsLine(another)
    true
  elsif self.perpendicular_to?(another)
    self.expand(PERPENDICULAR_PIXEL_EXPAND_AMOUNT).intersectsLine(another.expand(PERPENDICULAR_PIXEL_EXPAND_AMOUNT))
  else
    self.expand(COLINEAR_OR_PARALLEL_PIXEL_EXPAND_AMOUNT).intersectsLine(another.expand(COLINEAR_OR_PARALLEL_PIXEL_EXPAND_AMOUNT))
  end
end

#oblique?Boolean

Returns:

  • (Boolean)


141
142
143
# File 'lib/tabula/entities/ruling.rb', line 141

def oblique?
  !(vertical? || horizontal?)
end

#perpendicular_to?(other) ⇒ Boolean

Returns:

  • (Boolean)


145
146
147
# File 'lib/tabula/entities/ruling.rb', line 145

def perpendicular_to?(other)
  return self.vertical? == other.horizontal?
end

#positionObject

‘x` (left) coordinate if line vertical, `y` (top) if horizontal

Raises:

  • (NoMethodError)


45
46
47
48
# File 'lib/tabula/entities/ruling.rb', line 45

def position
  raise NoMethodError, "Oblique line #{self.inspect} has no #position method." if oblique?
  vertical? ? left : top
end

#position=(coord) ⇒ Object

Raises:

  • (NoMethodError)


57
58
59
60
61
62
63
64
65
66
# File 'lib/tabula/entities/ruling.rb', line 57

def position=(coord)
  raise NoMethodError, "Oblique line #{self.inspect} has no #position= method." if oblique?
  if vertical?
    self.left = coord
    self.right = coord
  else
    self.top = coord
    self.bottom = coord
  end
end

#right=(v) ⇒ Object



28
29
30
# File 'lib/tabula/entities/ruling.rb', line 28

def right=(v)
  self.java_send :setLine, [Java::float, Java::float, Java::float, Java::float,], left, top, v, bottom
end

#startObject

Raises:

  • (NoMethodError)


49
50
51
52
# File 'lib/tabula/entities/ruling.rb', line 49

def start
  raise NoMethodError, "Oblique line #{self.inspect} has no #start method." if oblique?
  vertical? ? top : left
end

#start=(coord) ⇒ Object

Raises:

  • (NoMethodError)


67
68
69
70
71
72
73
74
# File 'lib/tabula/entities/ruling.rb', line 67

def start=(coord)
  raise NoMethodError, "Oblique line #{self.inspect} has no #start= method." if oblique?
  if vertical?
    self.top = coord
  else
    self.left = coord
  end
end

#to_json(arg) ⇒ Object



149
150
151
# File 'lib/tabula/entities/ruling.rb', line 149

def to_json(arg)
  [left, top, right, bottom].to_json
end

#top=(v) ⇒ Object



16
17
18
# File 'lib/tabula/entities/ruling.rb', line 16

def top=(v)
  self.java_send :setLine, [Java::float, Java::float, Java::float, Java::float,], left, v, right, bottom
end

#vertical?Boolean

Returns:

  • (Boolean)


133
134
135
# File 'lib/tabula/entities/ruling.rb', line 133

def vertical?
  left == right
end

#widthObject



32
33
34
# File 'lib/tabula/entities/ruling.rb', line 32

def width
  right - left
end