Class: Rectangle2D

Inherits:
Object
  • Object
show all
Defined in:
lib/tabula/core_ext.rb

Constant Summary collapse

SIMILARITY_DIVISOR =
20

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.unionize(non_overlapping_rectangles, next_rect) ⇒ Object



260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
# File 'lib/tabula/core_ext.rb', line 260

def self.unionize(non_overlapping_rectangles, next_rect)
  #if next_rect doesn't overlap any of non_overlapping_rectangles
  if !(overlapping = non_overlapping_rectangles.compact.select{|r| next_rect.overlaps? r}).empty? &&
       !non_overlapping_rectangles.empty?
    #remove all of those that it overlaps from non_overlapping_rectangles and
    non_overlapping_rectangles -= overlapping
    #add to non_overlapping_rectangles the bounding box of the overlapping rectangles.
    non_overlapping_rectangles << overlapping.inject(next_rect) do |memo, overlap|
      #all we're doing is unioning `overlap` and `memo` and setting that result to `memo`
      union(overlap, memo, memo) #I </3 Java.
      memo
    end
  else
    non_overlapping_rectangles << next_rect
  end
end

Instance Method Details

#areaObject



167
168
169
# File 'lib/tabula/core_ext.rb', line 167

def area
  self.width * self.height
end

#bottom=(new_y2) ⇒ Object



152
153
154
# File 'lib/tabula/core_ext.rb', line 152

def bottom=(new_y2)
  self.java_send :setRect, [Java::float, Java::float, Java::float, Java::float,], self.x, self.y, self.width, new_y2 - self.y
end

#dims(*format) ⇒ Object

Implement geometry stuff




136
137
138
139
140
141
142
# File 'lib/tabula/core_ext.rb', line 136

def dims(*format)
  if format
    format.map{|method| self.send(method)}
  else
    [self.x, self.y, self.width, self.height]
  end
end

#horizontal_distance(other) ⇒ Object



184
185
186
# File 'lib/tabula/core_ext.rb', line 184

def horizontal_distance(other)
  (other.left - self.right).abs
end

#horizontal_midpointObject



176
177
178
# File 'lib/tabula/core_ext.rb', line 176

def horizontal_midpoint
  self.left + (self.width / 2)
end

#horizontal_overlap_ratio(other) ⇒ Object

as defined by PDF-TREX paper



236
237
238
239
240
241
242
243
244
245
246
247
248
249
# File 'lib/tabula/core_ext.rb', line 236

def horizontal_overlap_ratio(other)
  delta = [self.bottom - self.top, other.bottom - other.top].min
  if [other.top, self.top, other.bottom, self.bottom].sorted?
    (other.bottom - self.top) / delta
  elsif [self.top, other.top, self.bottom, other.bottom].sorted?
    (self.bottom - other.top) / delta
  elsif [self.top, other.top, other.bottom, self.bottom].sorted?
    (other.bottom - other.top) / delta
  elsif [other.top, self.top, self.bottom, other.bottom].sorted?
    (self.bottom - self.top) / delta
  else
    0
  end
end

#horizontally_overlaps?(other) ⇒ Boolean

detects if self and other belong to the same column

Returns:

  • (Boolean)


213
214
215
216
# File 'lib/tabula/core_ext.rb', line 213

def horizontally_overlaps?(other)
  horizontal_overlap = [0, [self.right, other.right].min  - [self.left, other.left].max].max
  horizontal_overlap > 0
end

#inspectObject



285
286
287
# File 'lib/tabula/core_ext.rb', line 285

def inspect
  "#<Rectangle2D dims:[#{top}, #{left}, #{bottom}, #{right}]>"
end

#left=(new_x) ⇒ Object



156
157
158
159
160
161
# File 'lib/tabula/core_ext.rb', line 156

def left=(new_x)
  delta_width = new_x - self.x
  self.java_send :setRect, [Java::float, Java::float, Java::float, Java::float,], new_x, self.y, (self.width - delta_width), self.height
  #used to be: (fixes test_vertical_rulings_splitting_words)
  # self.java_send :setRect, [Java::float, Java::float, Java::float, Java::float,], new_x, self.y, self.width, self.height
end

#midpointObject

x, y


172
173
174
# File 'lib/tabula/core_ext.rb', line 172

def midpoint
  [horizontal_midpoint, vertical_midpoint]
end

#overlap_ratio(other) ⇒ Object



226
227
228
229
230
231
232
233
# File 'lib/tabula/core_ext.rb', line 226

def overlap_ratio(other)
  intersection_width = [0, [self.right, other.right].min  - [self.left, other.left].max].max
  intersection_height = [0, [self.bottom, other.bottom].min - [self.top, other.top].max].max
  intersection_area = [0, intersection_height * intersection_width].max

  union_area = self.area + other.area - intersection_area
  intersection_area / union_area
end

#overlaps?(other) ⇒ Boolean

Returns:

  • (Boolean)


218
219
220
# File 'lib/tabula/core_ext.rb', line 218

def overlaps?(other)
  self.intersects(*other.dims(:x, :y, :width, :height))
end

#overlaps_with_ratio?(other, ratio_tolerance = 0.00001) ⇒ Boolean

Returns:

  • (Boolean)


222
223
224
# File 'lib/tabula/core_ext.rb', line 222

def overlaps_with_ratio?(other, ratio_tolerance=0.00001)
  self.overlap_ratio(other) > ratio_tolerance
end

#right=(new_x2) ⇒ Object



163
164
165
# File 'lib/tabula/core_ext.rb', line 163

def right=(new_x2)
  self.java_send :setRect, [Java::float, Java::float, Java::float, Java::float,], self.x, self.y, new_x2 - self.x, self.height
end

#similarity_hashObject

used for “deduping” similar rectangles detected via CV.



256
257
258
# File 'lib/tabula/core_ext.rb', line 256

def similarity_hash
  [self.x.to_i / SIMILARITY_DIVISOR, self.y.to_i / SIMILARITY_DIVISOR, self.width.to_i / SIMILARITY_DIVISOR, self.height.to_i / SIMILARITY_DIVISOR].to_s
end

#to_hObject



277
278
279
280
281
282
283
# File 'lib/tabula/core_ext.rb', line 277

def to_h
  hash = {}
  [:top, :left, :width, :height].each do |m|
    hash[m] = self.send(m)
  end
  hash
end

#to_linesObject

decomposes a rectangle into its 4 constitutent lines



193
194
195
196
197
198
199
200
# File 'lib/tabula/core_ext.rb', line 193

def to_lines
  #      top left width height
  top = Line2D::Float.new self.left, self.top, self.right, self.top
  bottom = Line2D::Float.new self.left, self.bottom, self.right, self.bottom
  left = Line2D::Float.new self.left, self.top, self.left, self.bottom
  right = Line2D::Float.new self.right, self.top, self.right, self.bottom
  [top, bottom, left, right]
end

#top=(new_y) ⇒ Object



144
145
146
147
148
149
150
# File 'lib/tabula/core_ext.rb', line 144

def top=(new_y)
  delta_height = new_y - self.y
  self.java_send :setRect, [Java::float, Java::float, Java::float, Java::float,], self.x, new_y, self.width, (self.height - delta_height)

  #used to be: (fixes test_vertical_rulings_splitting_words)
  # self.java_send :setRect, [Java::float, Java::float, Java::float, Java::float,], self.x, new_y, self.width, self.height
end

#vertical_distance(other) ⇒ Object



188
189
190
# File 'lib/tabula/core_ext.rb', line 188

def vertical_distance(other)
  (other.bottom - self.bottom).abs
end

#vertical_midpointObject



180
181
182
# File 'lib/tabula/core_ext.rb', line 180

def vertical_midpoint
  self.top + (self.height / 2)
end

#vertically_overlaps?(other) ⇒ Boolean

Roughly, detects if self and other belong to the same line

Returns:

  • (Boolean)


207
208
209
210
# File 'lib/tabula/core_ext.rb', line 207

def vertically_overlaps?(other)
  vertical_overlap = [0, [self.bottom, other.bottom].min - [self.top, other.top].max].max
  vertical_overlap > 0
end