Class: PDF::Reader::TextReceiver
- Inherits:
-
Object
- Object
- PDF::Reader::TextReceiver
- Defined in:
- lib/pdf/reader/text_receiver.rb
Overview
Instance Method Summary collapse
-
#begin_document(root) ⇒ Object
Called when the document parsing begins.
-
#begin_page(info) ⇒ Object
Called when new page parsing begins.
- #begin_page_container(page) ⇒ Object
-
#begin_text_object ⇒ Object
PDF operator BT.
- #calculate_line_and_location(new_loc) ⇒ Object
-
#end_document ⇒ Object
Called when the document parsing ends.
-
#end_page ⇒ Object
Called when page parsing ends.
- #end_page_container ⇒ Object
-
#end_text_object ⇒ Object
PDF operator ET.
-
#initialize(main_receiver) ⇒ TextReceiver
constructor
Initialize with the library user’s receiver.
- #media_box_check(dict) ⇒ Object
-
#move_text_position(tx, ty) ⇒ Object
PDF operator Td.
-
#move_text_position_and_set_leading(tx, ty) ⇒ Object
PDF operator TD.
-
#move_to_next_line_and_show_text(string) ⇒ Object
PDF operator ‘.
-
#move_to_start_of_next_line ⇒ Object
PDF operator T*.
-
#set_character_spacing(n) ⇒ Object
PDF operator Tc.
-
#set_horizontal_text_scaling(n) ⇒ Object
PDF operator Tz.
-
#set_spacing_next_line_show_text(aw, ac, string) ⇒ Object
PDF operator “.
-
#set_text_leading(n) ⇒ Object
PDF operator TL.
-
#set_text_matrix_and_text_line_matrix(a, b, c, d, e, f) ⇒ Object
PDF operator Tm.
-
#set_word_spacing(n) ⇒ Object
PDF operator Tw.
-
#show_text(string) ⇒ Object
PDF operator Tj.
-
#show_text_with_positioning(params) ⇒ Object
PDF operator TJ.
- #super_show_text(string) ⇒ Object
Constructor Details
#initialize(main_receiver) ⇒ TextReceiver
Initialize with the library user’s receiver
37 38 39 40 |
# File 'lib/pdf/reader/text_receiver.rb', line 37 def initialize (main_receiver) @main_receiver = main_receiver @upper_corners = [] end |
Instance Method Details
#begin_document(root) ⇒ Object
Called when the document parsing begins
43 44 45 |
# File 'lib/pdf/reader/text_receiver.rb', line 43 def begin_document (root) @upper_corners = [] end |
#begin_page(info) ⇒ Object
Called when new page parsing begins
61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
# File 'lib/pdf/reader/text_receiver.rb', line 61 def begin_page (info) @page = info @state = [{ :char_spacing => 0, :word_spacing => 0, :hori_scaling => 100, :leading => 0, :tj_adjustment => 0, }] @upper_corners.push(media_box_check(info)) @output = [] @line = 0 @location = 0 @displacement = {} @smallest_y_loc = @upper_corners.last[:ury] @written_to = false end |
#begin_page_container(page) ⇒ Object
52 53 54 |
# File 'lib/pdf/reader/text_receiver.rb', line 52 def begin_page_container (page) @upper_corners.push(media_box_check(page)) end |
#begin_text_object ⇒ Object
PDF operator BT
89 90 91 |
# File 'lib/pdf/reader/text_receiver.rb', line 89 def begin_text_object @state.push(@state.last.dup) end |
#calculate_line_and_location(new_loc) ⇒ Object
229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 |
# File 'lib/pdf/reader/text_receiver.rb', line 229 def calculate_line_and_location (new_loc) ##puts "calculate_line_and_location(#{new_loc})" key = new_loc; key.freeze #key = new_loc.to_s # because hashes with string keys are magic (auto-freeze) if @written_to unless @displacement.has_key?(key) if key < @location @displacement[key] = @line + 1 elsif key < @smallest_y_loc @displacement[key] = @line + 1 else key = @displacement.keys.find_all {|i| key > i}.sort.last @displacement[key] = 0 unless @displacement.has_key?(key) end end else @displacement[key] = 0 end @smallest_y_loc = key if key < @smallest_y_loc @location = key @line = @displacement[key] #puts "calculate_line_and_location: @location=#@location @line=#@line smallest_y_loc=#@smallest_y_loc" end |
#end_document ⇒ Object
Called when the document parsing ends
48 49 50 |
# File 'lib/pdf/reader/text_receiver.rb', line 48 def end_document @state.clear end |
#end_page ⇒ Object
Called when page parsing ends
83 84 85 86 |
# File 'lib/pdf/reader/text_receiver.rb', line 83 def end_page @main_receiver << @output.join("\n") @upper_corners.pop end |
#end_page_container ⇒ Object
56 57 58 |
# File 'lib/pdf/reader/text_receiver.rb', line 56 def end_page_container @upper_corners.pop end |
#end_text_object ⇒ Object
PDF operator ET
94 95 96 |
# File 'lib/pdf/reader/text_receiver.rb', line 94 def end_text_object @state.pop end |
#media_box_check(dict) ⇒ Object
217 218 219 220 221 222 223 224 225 226 227 |
# File 'lib/pdf/reader/text_receiver.rb', line 217 def media_box_check (dict) corners = (@upper_corners.last || {:urx => 0, :ury => 0}).dup if dict.has_key?('MediaBox') media_box = dict['MediaBox'] corners[:urx] = media_box[2] - media_box[0] corners[:ury] = media_box[3] - media_box[1] end corners end |
#move_text_position(tx, ty) ⇒ Object
PDF operator Td
129 130 131 132 |
# File 'lib/pdf/reader/text_receiver.rb', line 129 def move_text_position (tx, ty) #puts "#{tx} #{ty} Td" calculate_line_and_location(@location + ty) end |
#move_text_position_and_set_leading(tx, ty) ⇒ Object
PDF operator TD
135 136 137 138 |
# File 'lib/pdf/reader/text_receiver.rb', line 135 def move_text_position_and_set_leading (tx, ty) set_text_leading(ty)# * -1) move_text_position(tx, ty) end |
#move_to_next_line_and_show_text(string) ⇒ Object
PDF operator ‘
205 206 207 208 |
# File 'lib/pdf/reader/text_receiver.rb', line 205 def move_to_next_line_and_show_text (string) move_to_start_of_next_line show_text(string) end |
#move_to_start_of_next_line ⇒ Object
PDF operator T*
124 125 126 |
# File 'lib/pdf/reader/text_receiver.rb', line 124 def move_to_start_of_next_line move_text_position(0, @state.last[:leading]) end |
#set_character_spacing(n) ⇒ Object
PDF operator Tc
104 105 106 |
# File 'lib/pdf/reader/text_receiver.rb', line 104 def set_character_spacing (n) @state.last[:char_spacing] = n end |
#set_horizontal_text_scaling(n) ⇒ Object
PDF operator Tz
114 115 116 |
# File 'lib/pdf/reader/text_receiver.rb', line 114 def set_horizontal_text_scaling (n) @state.last[:hori_scaling] = n/100 end |
#set_spacing_next_line_show_text(aw, ac, string) ⇒ Object
PDF operator “
211 212 213 214 215 |
# File 'lib/pdf/reader/text_receiver.rb', line 211 def set_spacing_next_line_show_text (aw, ac, string) set_word_spacing(aw) set_character_spacing(ac) move_to_next_line_and_show_text(string) end |
#set_text_leading(n) ⇒ Object
PDF operator TL
119 120 121 |
# File 'lib/pdf/reader/text_receiver.rb', line 119 def set_text_leading (n) @state.last[:leading] = n end |
#set_text_matrix_and_text_line_matrix(a, b, c, d, e, f) ⇒ Object
PDF operator Tm
99 100 101 |
# File 'lib/pdf/reader/text_receiver.rb', line 99 def set_text_matrix_and_text_line_matrix (a, b, c, d, e, f) calculate_line_and_location(f) end |
#set_word_spacing(n) ⇒ Object
PDF operator Tw
109 110 111 |
# File 'lib/pdf/reader/text_receiver.rb', line 109 def set_word_spacing (n) @state.last[:word_spacing] = n end |
#show_text(string) ⇒ Object
PDF operator Tj
141 142 143 144 145 146 147 148 149 150 151 152 |
# File 'lib/pdf/reader/text_receiver.rb', line 141 def show_text (string) #puts "getting line #@line" place = (@output[@line] ||= "") #place << " " unless place.empty? place << " " * (@state.last[:tj_adjustment].abs/900) if @state.last[:tj_adjustment] < -1000 place << string #puts "place is now: #{place}" @written_to = true end |
#show_text_with_positioning(params) ⇒ Object
PDF operator TJ
189 190 191 192 193 194 195 196 197 198 199 200 201 202 |
# File 'lib/pdf/reader/text_receiver.rb', line 189 def show_text_with_positioning (params) prev_adjustment = @state.last[:tj_adjustment] params.each do |p| case p when Float @state.last[:tj_adjustment] = p else show_text(p) end end @state.last[:tj_adjustment] = prev_adjustment end |
#super_show_text(string) ⇒ Object
153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 |
# File 'lib/pdf/reader/text_receiver.rb', line 153 def super_show_text (string) urx = @upper_corners.last[:urx]/TS_UNITS_PER_H_CHAR ury = @upper_corners.last[:ury]/TS_UNITS_PER_V_CHAR x = (@tm[2,0]/TS_UNITS_PER_H_CHAR).to_i y = (ury - (@tm[2,1]/TS_UNITS_PER_V_CHAR)).to_i #puts "rendering '#{string}' to #{x}x#{y}" place = (@output[y] ||= (" " * urx.to_i)) #puts "#{urx} #{place.size} #{string.size} #{x}" return if x+string.size >= urx string.split(//).each do |c| chars = 1 case c when " " chars += @state.last[:word_spacing].to_i place[x-1, chars] = (" " * chars) else chars += @state.last[:char_spacing].to_i chars -= (@state.last[:tj_adjustment]/1000).to_i if @state.last[:tj_adjustment] chars = 1 if chars < 1 place[x-1] = c place[x, chars-1] = (" " * (chars-1)) if chars > 1 end x += chars end @tm += Matrix.rows([[1, 0, 0], [0, 1, 0], [x*TS_UNITS_PER_H_CHAR, y*TS_UNITS_PER_V_CHAR, 1]]) end |