Module: CombinePDF::PDFOperations
- Defined in:
- lib/combine_pdf/operations.rb
Overview
This is an internal class. you don’t need it.
Class Method Summary collapse
- ._each_object(object, limit_references = true, first_call = true, &block) ⇒ Object
- ._format_array_to_pdf(object) ⇒ Object
- ._format_hash_to_pdf(object) ⇒ Object
- ._format_name_to_pdf(object) ⇒ Object
- ._format_string_to_pdf(object) ⇒ Object
-
._object_to_pdf(object) ⇒ Object
Formats an object into PDF format.
- .change_connected_references_to_actual_values(hash_with_references = {}) ⇒ Object
- .change_references_to_actual_values(objects_array = [], hash_with_references = {}) ⇒ Object
- .connect_references_and_actual_values(objects_array = [], hash_with_references = {}) ⇒ Object
-
.copy_and_secure_for_injection(page) ⇒ Object
copy_and_secure_for_injection(page) - page is a page in the pages array, i.e.
-
.create_deep_copy(object) ⇒ Object
Ruby normally assigns pointes.
- .flatten_resources_dictionaries(resources) ⇒ Object
-
.get_referenced(object) ⇒ Object
returns the PDF Object Hash holding the acutal data (if exists) or the original hash (if it wasn’t a reference).
- .get_refernced_object(objects_array = [], reference_hash = {}) ⇒ Object
-
.remove_old_ids(objects) ⇒ Object
- removes id and generation number values, for better comparrison and avoiding object duplication objects
-
one or more objects in a PDF file/page.
Class Method Details
._each_object(object, limit_references = true, first_call = true, &block) ⇒ Object
253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 |
# File 'lib/combine_pdf/operations.rb', line 253 def _each_object(object, limit_references = true, first_call = true, &block) # ##################### # ## v.1.2 needs optimazation # case # when object.is_a?(Array) # object.each {|obj| _each_object(obj, limit_references, &block)} # when object.is_a?(Hash) # yield(object) # object.each do |k,v| # unless (limit_references && k == :referenced_object) # unless k == :Parent # _each_object(v, limit_references, &block) # end # end # end # end ##################### ## v.2.1 needs optimazation ## version 2.1 is slightly faster then v.1.2 @already_visited = [] if first_call unless limit_references @already_visited << object.object_id end case when object.is_a?(Array) object.each {|obj| _each_object(obj, limit_references, false, &block)} when object.is_a?(Hash) yield(object) unless limit_references && object[:is_reference_only] object.each do |k,v| _each_object(v, limit_references, false, &block) unless @already_visited.include? v.object_id end end end end |
._format_array_to_pdf(object) ⇒ Object
355 356 357 358 359 360 |
# File 'lib/combine_pdf/operations.rb', line 355 def _format_array_to_pdf(object) # An array shall be written as a sequence of objects enclosed in SQUARE BRACKETS (using LEFT SQUARE BRACKET (5Bh) and RIGHT SQUARE BRACKET (5Dh)). # EXAMPLE [549 3.14 false (Ralph) /SomeName] ("[" + (object.collect {|item| _object_to_pdf(item)}).join(' ') + "]").force_encoding(Encoding::ASCII_8BIT) end |
._format_hash_to_pdf(object) ⇒ Object
362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 |
# File 'lib/combine_pdf/operations.rb', line 362 def _format_hash_to_pdf(object) # if the object is only a reference: # special conditions apply, and there is only the setting of the reference (if needed) and output if object[:is_reference_only] # if object[:referenced_object] && object[:referenced_object].is_a?(Hash) object[:indirect_reference_id] = object[:referenced_object][:indirect_reference_id] object[:indirect_generation_number] = object[:referenced_object][:indirect_generation_number] end object[:indirect_reference_id] ||= 0 object[:indirect_generation_number] ||= 0 return "#{object[:indirect_reference_id].to_s} #{object[:indirect_generation_number].to_s} R".force_encoding(Encoding::ASCII_8BIT) end # if the object is indirect... out = [] if object[:indirect_reference_id] object[:indirect_reference_id] ||= 0 object[:indirect_generation_number] ||= 0 out << "#{object[:indirect_reference_id].to_s} #{object[:indirect_generation_number].to_s} obj\n".force_encoding(Encoding::ASCII_8BIT) if object[:indirect_without_dictionary] out << _object_to_pdf(object[:indirect_without_dictionary]) out << "\nendobj\n" return out.join().force_encoding(Encoding::ASCII_8BIT) end end # correct stream length, if the object is a stream. object[:Length] = object[:raw_stream_content].bytesize if object[:raw_stream_content] # if the object is not a simple object, it is a dictionary # A dictionary shall be written as a sequence of key-value pairs enclosed in double angle brackets (<<...>>) # (using LESS-THAN SIGNs (3Ch) and GREATER-THAN SIGNs (3Eh)). out << "<<\n".force_encoding(Encoding::ASCII_8BIT) object.each do |key, value| out << "#{_object_to_pdf key} #{_object_to_pdf value}\n".force_encoding(Encoding::ASCII_8BIT) unless PRIVATE_HASH_KEYS.include? key end out << ">>".force_encoding(Encoding::ASCII_8BIT) out << "\nstream\n#{object[:raw_stream_content]}\nendstream".force_encoding(Encoding::ASCII_8BIT) if object[:raw_stream_content] out << "\nendobj\n" if object[:indirect_reference_id] out.join().force_encoding(Encoding::ASCII_8BIT) end |
._format_name_to_pdf(object) ⇒ Object
333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 |
# File 'lib/combine_pdf/operations.rb', line 333 def _format_name_to_pdf(object) # a name object is an atomic symbol uniquely defined by a sequence of ANY characters (8-bit values) except null (character code 0). # print name as a simple string. all characters between ~ and ! (except #) can be raw # the rest will have a number sign and their HEX equivalant # from the standard: # When writing a name in a PDF file, a SOLIDUS (2Fh) (/) shall be used to introduce a name. The SOLIDUS is not part of the name but is a prefix indicating that what follows is a sequence of characters representing the name in the PDF file and shall follow these rules: # a) A NUMBER SIGN (23h) (#) in a name shall be written by using its 2-digit hexadecimal code (23), preceded by the NUMBER SIGN. # b) Any character in a name that is a regular character (other than NUMBER SIGN) shall be written as itself or by using its 2-digit hexadecimal code, preceded by the NUMBER SIGN. # c) Any character that is not a regular character shall be written using its 2-digit hexadecimal code, preceded by the NUMBER SIGN only. # [0x00, 0x09, 0x0a, 0x0c, 0x0d, 0x20, 0x28, 0x29, 0x3c, 0x3e, 0x5b, 0x5d, 0x7b, 0x7d, 0x2f, 0x25] out = object.to_s.bytes.to_a.map do |b| case b when 0..15 '#0' + b.to_s(16) when 15..32, 35, 37, 40, 41, 47, 60, 62, 91, 93, 123, 125, 127..256 '#' + b.to_s(16) else b.chr end end "/" + out.join() end |
._format_string_to_pdf(object) ⇒ Object
311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 |
# File 'lib/combine_pdf/operations.rb', line 311 def _format_string_to_pdf(object) if @string_output == :literal #if format is set to Literal #### can be better... replacement_hash = { "\x0A" => "\\n", "\x0D" => "\\r", "\x09" => "\\t", "\x08" => "\\b", "\xFF" => "\\f", "\x28" => "\\(", "\x29" => "\\)", "\x5C" => "\\\\" } 32.times {|i| replacement_hash[i.chr] ||= "\\#{i}"} (256-128).times {|i| replacement_hash[(i + 127).chr] ||= "\\#{i+127}"} ("(" + ([].tap {|out| object.bytes.each {|byte| replacement_hash[ byte.chr ] ? (replacement_hash[ byte.chr ].bytes.each {|b| out << b}) : out << byte } }).pack('C*') + ")").force_encoding(Encoding::ASCII_8BIT) else # A hexadecimal string shall be written as a sequence of hexadecimal digits (0–9 and either A–F or a–f) # encoded as ASCII characters and enclosed within angle brackets (using LESS-THAN SIGN (3Ch) and GREATER- THAN SIGN (3Eh)). ("<" + object.unpack('H*')[0] + ">").force_encoding(Encoding::ASCII_8BIT) end end |
._object_to_pdf(object) ⇒ Object
Formats an object into PDF format. This is used my the PDF object to format the PDF file and it is used in the secure injection which is still being developed.
292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 |
# File 'lib/combine_pdf/operations.rb', line 292 def _object_to_pdf object case when object.nil? return "null" when object.is_a?(String) return _format_string_to_pdf object when object.is_a?(Symbol) return _format_name_to_pdf object when object.is_a?(Array) return _format_array_to_pdf object when object.is_a?(Fixnum), object.is_a?(Float), object.is_a?(TrueClass), object.is_a?(FalseClass) return object.to_s + " " when object.is_a?(Hash) return _format_hash_to_pdf object else return '' end end |
.change_connected_references_to_actual_values(hash_with_references = {}) ⇒ Object
221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 |
# File 'lib/combine_pdf/operations.rb', line 221 def change_connected_references_to_actual_values(hash_with_references = {}) if hash_with_references.is_a?(Hash) hash_with_references.each do |k,v| if v.is_a?(Hash) && v[:is_reference_only] if v[:indirect_without_dictionary] hash_with_references[k] = v[:indirect_without_dictionary] elsif v[:referenced_object] hash_with_references[k] = v[:referenced_object] else raise "Cannot change references to values, as they are disconnected!" end end end hash_with_references.each {|k, v| change_connected_references_to_actual_values(v) if v.is_a?(Hash) || v.is_a?(Array)} elsif hash_with_references.is_a?(Array) hash_with_references.each {|item| change_connected_references_to_actual_values(item) if item.is_a?(Hash) || item.is_a?(Array)} end hash_with_references end |
.change_references_to_actual_values(objects_array = [], hash_with_references = {}) ⇒ Object
210 211 212 213 214 215 216 217 218 219 220 |
# File 'lib/combine_pdf/operations.rb', line 210 def change_references_to_actual_values(objects_array = [], hash_with_references = {}) hash_with_references.each do |k,v| if v.is_a?(Hash) && v[:is_reference_only] hash_with_references[k] = PDFOperations.get_refernced_object( objects_array, v) hash_with_references[k] = hash_with_references[k][:indirect_without_dictionary] if hash_with_references[k].is_a?(Hash) && hash_with_references[k][:indirect_without_dictionary] warn "Couldn't connect all values from references - didn't find reference #{hash_with_references}!!!" if hash_with_references[k] == nil hash_with_references[k] = v unless hash_with_references[k] end end hash_with_references end |
.connect_references_and_actual_values(objects_array = [], hash_with_references = {}) ⇒ Object
240 241 242 243 244 245 246 247 248 249 250 |
# File 'lib/combine_pdf/operations.rb', line 240 def connect_references_and_actual_values(objects_array = [], hash_with_references = {}) ret = true hash_with_references.each do |k,v| if v.is_a?(Hash) && v[:is_reference_only] ref_obj = PDFOperations.get_refernced_object( objects_array, v) hash_with_references[k] = ref_obj[:indirect_without_dictionary] if ref_obj.is_a?(Hash) && ref_obj[:indirect_without_dictionary] ret = false end end ret end |
.copy_and_secure_for_injection(page) ⇒ Object
copy_and_secure_for_injection(page)
-
page is a page in the pages array, i.e. pdf.pages
takes a page object and:
makes a deep copy of the page (Ruby defaults to pointers, so this will copy the memory).
then it will rewrite the content stream with renamed resources, so as to avoid name conflicts.
98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 |
# File 'lib/combine_pdf/operations.rb', line 98 def copy_and_secure_for_injection(page) # copy page new_page = create_deep_copy page # initiate dictionary from old names to new names names_dictionary = {} # itirate through all keys that are name objects and give them new names (add to dic) # this should be done for every dictionary in :Resources # this is a few steps stage: # 1. get resources object resources = new_page[:Resources] if resources[:is_reference_only] resources = resources[:referenced_object] raise "Couldn't tap into resources dictionary, as it is a reference and isn't linked." unless resources end # 2. establich direct access to dictionaries and remove reference values flatten_resources_dictionaries resources # 3. travel every dictionary to pick up names (keys), change them and add them to the dictionary resources.each do |k,v| if v.is_a?(Hash) new_dictionary = {} new_name = "Combine" + SecureRandom.hex(7) + "PDF" i = 1 v.each do |old_key, value| new_key = (new_name + i.to_s).to_sym names_dictionary[old_key] = new_key new_dictionary[new_key] = value i += 1 end resources[k] = new_dictionary end end # now that we have replaced the names in the resources dictionaries, # it is time to replace the names inside the stream # we will need to make sure we have access to the stream injected # we will user PDFFilter.inflate_object (new_page[:Contents].is_a?(Array) ? new_page[:Contents] : [new_page[:Contents] ]).each do |c| stream = c[:referenced_object] PDFFilter.inflate_object stream names_dictionary.each do |old_key, new_key| stream[:raw_stream_content].gsub! _object_to_pdf(old_key), _object_to_pdf(new_key) ##### PRAY(!) that the parsed datawill be correctly reproduced! end # patch back to PDF defaults, for OCRed PDF files. # stream[:raw_stream_content] = "q\nq\nq\nDeviceRGB CS\nDeviceRGB cs\n0 0 0 rg\n0 0 0 RG\n0 Tr\n%s\nQ\nQ\nQ\n" % stream[:raw_stream_content] # the following was removed for Acrobat Reader compatability: DeviceRGB CS\nDeviceRGB cs\n stream[:raw_stream_content] = "q\nq\nq\n0 0 0 rg\n0 0 0 RG\n0 Tr\n1 0 0 1 0 0 cm\n%s\nQ\nQ\nQ\n" % stream[:raw_stream_content] end new_page end |
.create_deep_copy(object) ⇒ Object
Ruby normally assigns pointes. noramlly:
a = [1,2,3] # => [1,2,3]
b = a # => [1,2,3]
a << 4 # => [1,2,3,4]
b # => [1,2,3,4]
This method makes sure that the memory is copied instead of a pointer assigned. this works using recursion, so that arrays and hashes within arrays and hashes are also copied and not pointed to. One needs to be careful of infinit loops using this function.
184 185 186 187 188 189 190 191 192 193 194 |
# File 'lib/combine_pdf/operations.rb', line 184 def create_deep_copy object if object.is_a?(Array) return object.map { |e| create_deep_copy e } elsif object.is_a?(Hash) return {}.tap {|out| object.each {|k,v| out[create_deep_copy(k)] = create_deep_copy(v) unless k == :Parent} } elsif object.is_a?(String) return object.dup else return object # objects that aren't Strings, Arrays or Hashes (such as Symbols and Fixnums) won't be edited inplace. end end |
.flatten_resources_dictionaries(resources) ⇒ Object
153 154 155 156 157 158 159 160 161 162 163 164 165 |
# File 'lib/combine_pdf/operations.rb', line 153 def flatten_resources_dictionaries(resources) resources.each do |k,v| if v.is_a?(Hash) && v[:is_reference_only] if v[:referenced_object] resources[k] = resources[k][:referenced_object].dup resources[k].delete(:indirect_reference_id) resources[k].delete(:indirect_generation_number) elsif v[:indirect_without_dictionary] resources[k] = resources[k][:indirect_without_dictionary] end end end end |
.get_referenced(object) ⇒ Object
returns the PDF Object Hash holding the acutal data (if exists) or the original hash (if it wasn’t a reference)
works only AFTER references have been connected.
170 171 172 |
# File 'lib/combine_pdf/operations.rb', line 170 def get_referenced object object[:referenced_object] || object end |
.get_refernced_object(objects_array = [], reference_hash = {}) ⇒ Object
201 202 203 204 205 206 207 208 209 |
# File 'lib/combine_pdf/operations.rb', line 201 def get_refernced_object(objects_array = [], reference_hash = {}) objects_array.each do |stored_object| return stored_object if ( stored_object.is_a?(Hash) && reference_hash[:indirect_reference_id] == stored_object[:indirect_reference_id] && reference_hash[:indirect_generation_number] == stored_object[:indirect_generation_number] ) end warn "didn't find reference #{reference_hash}" nil end |
.remove_old_ids(objects) ⇒ Object
removes id and generation number values, for better comparrison and avoiding object duplication
- objects
-
one or more objects in a PDF file/page.
198 199 200 |
# File 'lib/combine_pdf/operations.rb', line 198 def remove_old_ids objects _each_object(objects) {|obj| obj.delete(:indirect_reference_id); obj.delete(:indirect_generation_number)} end |