Class: CorpPdf::PDFWriter
- Inherits:
-
Object
- Object
- CorpPdf::PDFWriter
- Defined in:
- lib/corp_pdf/pdf_writer.rb
Overview
PDFWriter - Clean PDF writer for flattening documents Writes a complete PDF from parsed objects, consolidating incremental updates
Instance Method Summary collapse
-
#initialize ⇒ PDFWriter
constructor
A new instance of PDFWriter.
- #output ⇒ Object
- #write_header ⇒ Object
- #write_object(ref, body) ⇒ Object
- #write_trailer(size, root_ref, info_ref = nil) ⇒ Object
- #write_xref ⇒ Object
Constructor Details
#initialize ⇒ PDFWriter
Returns a new instance of PDFWriter.
7 8 9 10 11 12 |
# File 'lib/corp_pdf/pdf_writer.rb', line 7 def initialize # Work entirely in binary encoding to avoid UTF-8/ASCII-8BIT conflicts @buffer = "".b @offsets = [] # Track [obj_num, gen, offset] for xref table @xref_offset = 0 end |
Instance Method Details
#output ⇒ Object
129 130 131 |
# File 'lib/corp_pdf/pdf_writer.rb', line 129 def output @buffer end |
#write_header ⇒ Object
14 15 16 17 18 |
# File 'lib/corp_pdf/pdf_writer.rb', line 14 def write_header @buffer << "%PDF-1.6\n".b # Binary marker (helps PDF readers identify binary content) @buffer << "%\xE2\xE3\xCF\xD3\n".b end |
#write_object(ref, body) ⇒ Object
20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
# File 'lib/corp_pdf/pdf_writer.rb', line 20 def write_object(ref, body) obj_num, gen = ref offset = @buffer.bytesize @offsets << [obj_num, gen, offset] # Write object with proper PDF syntax # Use ASCII-8BIT encoding throughout to avoid conflicts @buffer << "#{obj_num} #{gen} obj\n".b # Body is already in binary from ObjectResolver @buffer << body.b # Ensure proper spacing before endobj @buffer << "\n".b unless body.end_with?("\n") @buffer << "endobj\n".b end |
#write_trailer(size, root_ref, info_ref = nil) ⇒ Object
114 115 116 117 118 119 120 121 122 123 124 125 126 127 |
# File 'lib/corp_pdf/pdf_writer.rb', line 114 def write_trailer(size, root_ref, info_ref = nil) trailer = "trailer\n".b trailer << "<<".b trailer << " /Size #{size}".b trailer << " /Root #{root_ref[0]} #{root_ref[1]} R".b trailer << " /Info #{info_ref[0]} #{info_ref[1]} R".b if info_ref trailer << " >>".b trailer << "\n".b trailer << "startxref\n".b trailer << "#{@xref_offset}\n".b trailer << "%%EOF\n".b @buffer << trailer end |
#write_xref ⇒ Object
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
# File 'lib/corp_pdf/pdf_writer.rb', line 37 def write_xref @xref_offset = @buffer.bytesize # Build xref table xref = "xref\n".b # Object 0 (free list head) xref << "0 1\n".b xref << "0000000000 65535 f \n".b # Sort offsets and group consecutive objects into subsections sorted = @offsets.sort_by { |num, gen, _offset| [num, gen] } # Find max object number to determine Size max_obj_num = sorted.map { |num, _gen, _offset| num }.max || 0 # Build xref entries covering all objects from 0 to max_obj_num # Missing objects are marked as free (type 'f') i = 0 current_obj = 0 while current_obj <= max_obj_num # Find next existing object next_existing = sorted.find { |num, _gen, _offset| num >= current_obj } if next_existing && next_existing[0] == current_obj # Object exists - find consecutive run of existing objects first_num = current_obj run_length = 1 while (i + run_length) < sorted.length && sorted[i + run_length][0] == first_num + run_length && sorted[i + run_length][1] == sorted[i][1] run_length += 1 end # Write subsection header xref << "#{first_num} #{run_length}\n".b # Write entries in this subsection run_length.times do |j| offset = sorted[i + j][2] gen = sorted[i + j][1] xref << format("%010d %05d n \n", offset, gen).b end i += run_length current_obj = first_num + run_length else # Object doesn't exist - find consecutive run of missing objects first_missing = current_obj missing_count = 1 while current_obj + missing_count <= max_obj_num check_obj = current_obj + missing_count if sorted.any? { |num, _gen, _offset| num == check_obj } break end missing_count += 1 end # Write subsection header for missing objects xref << "#{first_missing} #{missing_count}\n".b # Write free entries missing_count.times do xref << "0000000000 65535 f \n".b end current_obj = first_missing + missing_count end end @buffer << xref end |