Class: PDF::Reader::PageTextReceiver

Inherits:
Object
  • Object
show all
Defined in:
lib/pdf/reader/page_text_receiver.rb

Defined Under Namespace

Classes: Point

Constant Summary collapse

DEFAULT_GRAPHICS_STATE =
{
  :ctm          => Matrix.identity(3),
  :char_spacing => 0,
  :word_spacing => 0,
  :h_scaling    => 100,
  :text_leading => 0,
  :text_font    => nil,
  :text_font_size => nil,
  :text_mode    => 0,
  :text_rise    => 0,
  :text_knockout => 0
}

Instance Method Summary collapse

Instance Method Details

#begin_text_objectObject

Text Object Operators



80
81
82
83
# File 'lib/pdf/reader/page_text_receiver.rb', line 80

def begin_text_object
  @text_matrix      = Matrix.identity(3)
  @text_line_matrix = Matrix.identity(3)
end

#concatenate_matrix(a, b, c, d, e, f) ⇒ Object

update the current transformation matrix.

If the CTM is currently undefined, just store the new values.

If there’s an existing CTM, then multiply the existing matrix with the new matrix to form the updated matrix.



63
64
65
66
67
68
69
70
71
72
73
74
# File 'lib/pdf/reader/page_text_receiver.rb', line 63

def concatenate_matrix(a, b, c, d, e, f)
  transform = Matrix[
    [a, b, 0],
    [c, d, 0],
    [e, f, 1]
  ]
  if state[:ctm]
    state[:ctm] = transform * state[:ctm]
  else
    state[:ctm] = transform
  end
end

#contentObject



33
34
35
36
37
38
# File 'lib/pdf/reader/page_text_receiver.rb', line 33

def content
  keys = @content.keys.sort.reverse
  keys.map { |key|
    @content[key]
  }.join("\n")
end

#end_text_objectObject



85
86
87
88
# File 'lib/pdf/reader/page_text_receiver.rb', line 85

def end_text_object
  @text_matrix      = Matrix.identity(3)
  @text_line_matrix = Matrix.identity(3)
end

#invoke_xobject(label) ⇒ Object

XObjects



189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
# File 'lib/pdf/reader/page_text_receiver.rb', line 189

def invoke_xobject(label)
  save_graphics_state
  xobject = @objects.deref(@page.xobjects[label])

  matrix = xobject.hash[:Matrix]
  concatenate_matrix(*matrix) if matrix

  if xobject.hash[:Subtype] == :Form
    form = PDF::Reader::FormXObject.new(@page, xobject)
    @form_fonts = form.fonts
    form.walk(self)
  end
  @form_fonts = {}

  restore_graphics_state
end

#move_text_position(x, y) ⇒ Object

Text Positioning Operators



127
128
129
130
131
132
133
134
# File 'lib/pdf/reader/page_text_receiver.rb', line 127

def move_text_position(x, y) # Td
  temp_matrix = Matrix[
                  [1, 0, 0],
                  [0, 1, 0],
                  [x, y, 1]
                ]
  @text_matrix = @text_line_matrix = temp_matrix * @text_line_matrix
end

#move_text_position_and_set_leading(x, y) ⇒ Object

TD



136
137
138
139
# File 'lib/pdf/reader/page_text_receiver.rb', line 136

def move_text_position_and_set_leading(x, y) # TD
  set_text_leading(-1 * y)
  move_text_position(x, y)
end

#move_to_next_line_and_show_text(str) ⇒ Object



175
176
177
178
# File 'lib/pdf/reader/page_text_receiver.rb', line 175

def move_to_next_line_and_show_text(str) # '
  move_to_start_of_next_line
  show_text(str)
end

#move_to_start_of_next_lineObject

T*



149
150
151
# File 'lib/pdf/reader/page_text_receiver.rb', line 149

def move_to_start_of_next_line # T*
  move_text_position(0, state[:text_leading])
end

#page=(page) ⇒ Object

starting a new page



24
25
26
27
28
29
30
31
# File 'lib/pdf/reader/page_text_receiver.rb', line 24

def page=(page)
  @page    = page
  @objects = page.objects
  @fonts   = page.fonts
  @form_fonts = {}
  @content = ::Hash.new
  @stack   = [DEFAULT_GRAPHICS_STATE]
end

#restore_graphics_stateObject



48
49
50
# File 'lib/pdf/reader/page_text_receiver.rb', line 48

def restore_graphics_state
  @stack.pop
end

#save_graphics_stateObject

Graphics State Operators



44
45
46
# File 'lib/pdf/reader/page_text_receiver.rb', line 44

def save_graphics_state
  @stack.push clone_state
end

#set_character_spacing(char_spacing) ⇒ Object

Text State Operators



94
95
96
# File 'lib/pdf/reader/page_text_receiver.rb', line 94

def set_character_spacing(char_spacing)
  state[:char_spacing] = char_spacing
end

#set_horizontal_text_scaling(h_scaling) ⇒ Object



98
99
100
# File 'lib/pdf/reader/page_text_receiver.rb', line 98

def set_horizontal_text_scaling(h_scaling)
  state[:h_scaling] = h_scaling
end

#set_spacing_next_line_show_text(aw, ac, string) ⇒ Object



180
181
182
183
184
# File 'lib/pdf/reader/page_text_receiver.rb', line 180

def set_spacing_next_line_show_text(aw, ac, string) # "
  set_word_spacing(aw)
  set_character_spacing(ac)
  move_to_next_line_and_show_text(string)
end

#set_text_font_and_size(label, size) ⇒ Object



102
103
104
105
# File 'lib/pdf/reader/page_text_receiver.rb', line 102

def set_text_font_and_size(label, size)
  state[:text_font]      = label
  state[:text_font_size] = size
end

#set_text_leading(leading) ⇒ Object



107
108
109
# File 'lib/pdf/reader/page_text_receiver.rb', line 107

def set_text_leading(leading)
  state[:text_leading] = leading
end

#set_text_matrix_and_text_line_matrix(a, b, c, d, e, f) ⇒ Object

Tm



141
142
143
144
145
146
147
# File 'lib/pdf/reader/page_text_receiver.rb', line 141

def set_text_matrix_and_text_line_matrix(a, b, c, d, e, f) # Tm
  @text_matrix = @text_line_matrix = Matrix[
                        [a, b, 0],
                        [c, d, 0],
                        [e, f, 1]
                      ]
end

#set_text_rendering_mode(mode) ⇒ Object



111
112
113
# File 'lib/pdf/reader/page_text_receiver.rb', line 111

def set_text_rendering_mode(mode)
  state[:text_mode] = mode
end

#set_text_rise(rise) ⇒ Object



115
116
117
# File 'lib/pdf/reader/page_text_receiver.rb', line 115

def set_text_rise(rise)
  state[:text_rise] = rise
end

#set_word_spacing(word_spacing) ⇒ Object



119
120
121
# File 'lib/pdf/reader/page_text_receiver.rb', line 119

def set_word_spacing(word_spacing)
  state[:word_spacing] = word_spacing
end

#show_text(string) ⇒ Object

record text that is drawn on the page



158
159
160
161
162
# File 'lib/pdf/reader/page_text_receiver.rb', line 158

def show_text(string) # Tj
  at = transform(Point.new(0,0))
  @content[at.y] ||= ""
  @content[at.y] << current_font.to_utf8(string)
end

#show_text_with_positioning(params) ⇒ Object

TJ



164
165
166
167
168
169
170
171
172
173
# File 'lib/pdf/reader/page_text_receiver.rb', line 164

def show_text_with_positioning(params) # TJ
  params.each { |arg|
    case arg
    when String
      show_text(arg)
    when Fixnum, Float
      show_text(" ") if arg > 1000
    end
  }
end