Class: PDF::Reader::PageLayout

Inherits:
Object
  • Object
show all
Defined in:
lib/pdf/reader/page_layout.rb

Overview

Takes a collection of TextRun objects and renders them into a single string that best approximates the way they’d appear on a render PDF page.

media box should be a 4 number array that describes the dimensions of the page to be rendered as described by the page’s MediaBox attribute

Constant Summary collapse

DEFAULT_FONT_SIZE =
12

Instance Method Summary collapse

Constructor Details

#initialize(runs, mediabox) ⇒ PageLayout

Returns a new instance of PageLayout.



19
20
21
22
23
24
25
26
27
28
29
30
31
32
# File 'lib/pdf/reader/page_layout.rb', line 19

def initialize(runs, mediabox)
  # mediabox is a 4-element array for now, but it'd be nice to switch to a
  # PDF::Reader::Rectangle at some point
  PDF::Reader::Error.validate_not_nil(mediabox, "mediabox")

  @mediabox = process_mediabox(mediabox)
  @runs = runs
  @mean_font_size   = mean(@runs.map(&:font_size)) || DEFAULT_FONT_SIZE
  @mean_font_size = DEFAULT_FONT_SIZE if @mean_font_size == 0
  @median_glyph_width = median(@runs.map(&:mean_character_width)) || 0
  @x_offset = @runs.map(&:x).sort.first || 0
  lowest_y = @runs.map(&:y).sort.first || 0
  @y_offset = lowest_y > 0 ? 0 : lowest_y
end

Instance Method Details

#to_sObject



34
35
36
37
38
39
40
41
42
43
44
45
46
47
# File 'lib/pdf/reader/page_layout.rb', line 34

def to_s
  return "" if @runs.empty?
  return "" if row_count == 0

  page = row_count.times.map { |i| " " * col_count }
  @runs.each do |run|
    x_pos = ((run.x - @x_offset) / col_multiplier).round
    y_pos = row_count - ((run.y - @y_offset) / row_multiplier).round
    if y_pos <= row_count && y_pos >= 0 && x_pos <= col_count && x_pos >= 0
      local_string_insert(page[y_pos-1], run.text, x_pos)
    end
  end
  interesting_rows(page).map(&:rstrip).join("\n")
end