Class: Origami::PDF

Inherits:
Object
  • Object
show all
Defined in:
lib/origami/pdf.rb,
lib/origami/xfa.rb,
lib/origami/page.rb,
lib/origami/export.rb,
lib/origami/header.rb,
lib/origami/actions.rb,
lib/origami/catalog.rb,
lib/origami/trailer.rb,
lib/origami/acroform.rb,
lib/origami/filespec.rb,
lib/origami/metadata.rb,
lib/origami/signature.rb,
lib/origami/xreftable.rb,
lib/origami/encryption.rb,
lib/origami/collections.rb,
lib/origami/parsers/pdf.rb,
lib/origami/destinations.rb,
lib/origami/linearization.rb,
lib/origami/outputintents.rb,
lib/origami/parsers/pdf/lazy.rb,
lib/origami/parsers/pdf/linear.rb

Overview

Main class representing a PDF file and its inner contents. A PDF file contains a set of Revision.

Defined Under Namespace

Classes: Header, Instruction, InvalidHeaderError, LazyParser, LinearParser, LinearizationError, Parser, Revision, SignatureError

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(parser = nil) ⇒ PDF

Creates a new PDF instance.

parser

The Parser object creating the document. If none is specified, some default structures are automatically created to get a minimal working document.



166
167
168
169
170
171
172
173
174
175
176
177
178
179
# File 'lib/origami/pdf.rb', line 166

def initialize(parser = nil)
    @header = PDF::Header.new
    @revisions = []

    add_new_revision
    @revisions.first.trailer = Trailer.new

    if parser
        @loaded = false
        @parser = parser
    else
        init
    end
end

Instance Attribute Details

#headerObject

Document header and revisions.



121
122
123
# File 'lib/origami/pdf.rb', line 121

def header
  @header
end

#revisionsObject

Document header and revisions.



121
122
123
# File 'lib/origami/pdf.rb', line 121

def revisions
  @revisions
end

Class Method Details

.create(output, options = {}) {|pdf| ... } ⇒ Object Also known as: write

Creates a new PDF and saves it. If a block is passed, the PDF instance can be processed before saving.

Yields:

  • (pdf)


144
145
146
147
148
# File 'lib/origami/pdf.rb', line 144

def create(output, options = {})
    pdf = PDF.new
    yield(pdf) if block_given?
    pdf.save(output, options)
end

.deserialize(filename) ⇒ Object

Deserializes a PDF dump.



154
155
156
157
158
# File 'lib/origami/pdf.rb', line 154

def deserialize(filename)
    Zlib::GzipReader.open(filename) { |gz|
        return Marshal.load(gz.read)
    }
end

.read(path, options = {}) ⇒ Object

Reads and parses a PDF file from disk.



127
128
129
130
131
132
133
134
135
136
137
138
# File 'lib/origami/pdf.rb', line 127

def read(path, options = {})
    path = File.expand_path(path) if path.is_a?(::String)
    lazy = options[:lazy]

    if lazy
        parser_class = PDF::LazyParser
    else
        parser_class = PDF::LinearParser
    end

    parser_class.new(options).parse(path)
end

Instance Method Details

#<<(object) ⇒ Object Also known as: insert

Adds a new object to the PDF file. If this object has no version number, then a new one will be automatically computed and assignated to him.

It returns a Reference to this Object.

object

The object to add.



391
392
393
394
395
396
397
398
399
400
401
402
# File 'lib/origami/pdf.rb', line 391

def <<(object)
    owner = object.document

    #
    # Does object belongs to another PDF ?
    #
    if owner and not owner.equal?(self)
        import object
    else
        add_to_revision(object, @revisions.last)
    end
end

#add_fields(*fields) ⇒ Object

Add a field to the Acrobat form.

field

The Field to add.

Raises:

  • (TypeError)


46
47
48
49
50
51
52
53
54
55
56
# File 'lib/origami/acroform.rb', line 46

def add_fields(*fields)
    raise TypeError, "Expected Field arguments" unless fields.all? { |f| f.is_a?(Field) }

    self.Catalog.AcroForm ||= InteractiveForm.new.set_indirect(true)
    self.Catalog.AcroForm.Fields ||= []

    self.Catalog.AcroForm.Fields.concat(fields)
    fields.each do |field| field.set_indirect(true) end

    self
end

#add_new_revisionObject

Ends the current Revision, and starts a new one.



438
439
440
441
442
443
444
445
446
# File 'lib/origami/pdf.rb', line 438

def add_new_revision
    root = @revisions.last.trailer[:Root] unless @revisions.empty?

    @revisions << Revision.new(self)
    @revisions.last.trailer = Trailer.new
    @revisions.last.trailer.Root = root

    self
end

#add_to_revision(object, revision) ⇒ Object

Adds a new object to a specific revision. If this object has no version number, then a new one will be automatically computed and assignated to him.

It returns a Reference to this Object.

object

The object to add.

revision

The revision to add the object to.



424
425
426
427
428
429
430
431
432
433
# File 'lib/origami/pdf.rb', line 424

def add_to_revision(object, revision)
    object.set_indirect(true)
    object.set_document(self)

    object.no, object.generation = allocate_new_object_number if object.no == 0

    revision.body[object.reference] = object

    object.reference
end

#allocate_new_object_numberObject

Returns a new number/generation for future object.



588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
# File 'lib/origami/pdf.rb', line 588

def allocate_new_object_number
    no = 1

    # Deprecated number allocation policy (first available)
    #no = no + 1 while get_object(no)

    objset = self.indirect_objects
    self.indirect_objects.find_all{|obj| obj.is_a?(ObjectStream)}.each do |objstm|
        objstm.each{|obj| objset << obj}
    end

    allocated = objset.collect{|obj| obj.no}.compact
    no = allocated.max + 1 unless allocated.empty?

    [ no, 0 ]
end

#append_page(page = Page.new) {|page| ... } ⇒ Object

Appends a page or list of pages to the end of the page tree.

page

The page to append to the document. Creates a new Page if not specified.

Pass the Page object if a block is present.

Yields:

  • (page)


31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# File 'lib/origami/page.rb', line 31

def append_page(page = Page.new)
    unless self.Catalog and self.Catalog.Pages and self.Catalog.Pages.is_a?(PageTreeNode)
        raise InvalidPDFError, "Invalid page tree"
    end

    treeroot = self.Catalog.Pages

    treeroot.Kids ||= [] #:nodoc:
    treeroot.Kids.push(page)
    treeroot.Count ||= 0
    treeroot.Count += 1

    page.Parent = treeroot

    yield(page) if block_given?

    self
end

#attach_file(path, register: true, name: nil, filter: :FlateDecode) ⇒ Object

Attachs an embedded file to the PDF.

path

The path to the file to attach.

register

Whether the file shall be registered in the name directory.

name

The embedded file name of the attachment.

filter

The stream filter used to store the file contents.



32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
# File 'lib/origami/filespec.rb', line 32

def attach_file(path, register: true, name: nil, filter: :FlateDecode)

    if path.is_a? FileSpec
        filespec = path
        name ||= ''
    else
        if path.respond_to?(:read)
            data = path.read.force_encoding('binary')
            name ||= ''
        else
            data = File.binread(File.expand_path(path))
            name ||= File.basename(path)
        end

        fstream = EmbeddedFileStream.new
        fstream.data = data

        fstream.Filter = filter
        filespec = FileSpec.new(:F => fstream)
    end

    fspec = FileSpec.new.setType(:Filespec).setF(name.dup).setEF(filespec)

    self.register(
        Names::EMBEDDED_FILES,
        name.dup,
        fspec
    ) if register

    fspec
end

#authorObject



41
# File 'lib/origami/metadata.rb', line 41

def author; get_document_info_field(:Author) end

#cast_object(reference, type, parser = nil) ⇒ Object

Casts a PDF object into another object type. The target type must be a subtype of the original type.



573
574
575
576
577
578
579
580
581
582
583
# File 'lib/origami/pdf.rb', line 573

def cast_object(reference, type, parser = nil) #:nodoc:
    @revisions.each do |rev|
        if rev.body.include?(reference) and type < rev.body[reference].class
            rev.body[reference] = rev.body[reference].cast_to(type, parser)

            rev.body[reference]
        else
            nil
        end
    end
end

#CatalogObject

Returns the current Catalog Dictionary.



40
41
42
43
44
45
46
47
48
49
50
51
# File 'lib/origami/catalog.rb', line 40

def Catalog
    cat = trailer_key(:Root)

    case cat
    when Catalog then
        cat
    when Dictionary then
        cat.cast_to(Catalog)
    else
        raise InvalidPDFError, "Broken catalog"
    end
end

#Catalog=(cat) ⇒ Object

Sets the current Catalog Dictionary.



56
57
58
59
60
61
62
# File 'lib/origami/catalog.rb', line 56

def Catalog=(cat)
    cat = cat.cast_to(Catalog) unless cat.is_a? Catalog

    delete_object(@revisions.last.trailer[:Root]) if @revisions.last.trailer[:Root]

    @revisions.last.trailer.Root = self << cat
end

#create_form(*fields) ⇒ Object

Creates a new AcroForm with specified fields.



35
36
37
38
39
40
# File 'lib/origami/acroform.rb', line 35

def create_form(*fields)
    acroform = self.Catalog.AcroForm ||= InteractiveForm.new.set_indirect(true)
    self.add_fields(*fields)

    acroform
end

#create_metadata(info = {}) ⇒ Object

Modifies or creates a metadata stream.



88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# File 'lib/origami/metadata.rb', line 88

def (info = {})
    skeleton = <<-XMP
    <?packet begin="#{"\xef\xbb\xbf"}" id="W5M0MpCehiHzreSzNTczkc9d"?>
      <x:xmpmeta xmlns:x="adobe:ns:meta/">
        <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
          <rdf:Description rdf:about="" xmlns:pdf="http://ns.adobe.com/pdf/1.3/">
          </rdf:Description>
        </rdf:RDF>
      </x:xmpmeta>
    <?xpacket end="w"?>
    XMP

    xml =
        if self.Catalog.Metadata.is_a?(Stream)
            self.Catalog.Metadata.data
        else
            skeleton
        end

    doc = REXML::Document.new(xml)
    desc = doc.elements['*/*/rdf:Description']

    info.each do |name, value|
        elt = REXML::Element.new "pdf:#{name}"
        elt.text = value

        desc.elements << elt
    end

    xml = ""; doc.write(xml, 4)

    if self.Catalog.Metadata.is_a?(Stream)
        self.Catalog.Metadata.data = xml
    else
       self.Catalog.Metadata = Stream.new(xml)
    end

    self.Catalog.Metadata
end

#create_xfa_form(xdp, *fields) ⇒ Object



30
31
32
33
34
35
# File 'lib/origami/xfa.rb', line 30

def create_xfa_form(xdp, *fields)
    acroform = create_form(*fields)
    acroform.XFA = XFAStream.new(xdp, :Filter => :FlateDecode)

    acroform
end

#creation_dateObject



46
# File 'lib/origami/metadata.rb', line 46

def creation_date; get_document_info_field(:CreationDate) end

#creatorObject



44
# File 'lib/origami/metadata.rb', line 44

def creator; get_document_info_field(:Creator) end

#decrypt(passwd = "") ⇒ Object

Decrypts the current document (only RC4 40..128 bits).

passwd

The password to decrypt the document.

Raises:



55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
# File 'lib/origami/encryption.rb', line 55

def decrypt(passwd = "")
    raise EncryptionError, "PDF is not encrypted" unless self.encrypted?

    encrypt_dict = trailer_key(:Encrypt)
    handler = Encryption::Standard::Dictionary.new(encrypt_dict.dup)

    unless handler.Filter == :Standard
        raise EncryptionNotSupportedError, "Unknown security handler : '#{handler.Filter.to_s}'"
    end

    crypt_filters = {
        Identity: Encryption::Identity
    }

    case handler.V.to_i
    when 1,2
        crypt_filters = Hash.new(Encryption::RC4)
        string_filter = stream_filter = nil
    when 4,5
        crypt_filters = {
            Identity: Encryption::Identity
        }

        if handler[:CF].is_a?(Dictionary)
            handler[:CF].each_pair do |name, cf|
                next unless cf.is_a?(Dictionary)

                crypt_filters[name.value] =
                    if cf[:CFM] == :V2 then Encryption::RC4
                    elsif cf[:CFM] == :AESV2 then Encryption::AES
                    elsif cf[:CFM] == :None then Encryption::Identity
                    elsif cf[:CFM] == :AESV3 and handler.V.to_i == 5 then Encryption::AES
                    else
                        raise EncryptionNotSupportedError, "Unsupported encryption version : #{handler.V}"
                    end
            end
        end

        string_filter = handler.StrF.is_a?(Name) ? handler.StrF.value : :Identity
        stream_filter = handler.StmF.is_a?(Name) ? handler.StmF.value : :Identity

        unless crypt_filters.key?(string_filter)
            raise EncryptionError, "Invalid StrF value in encryption dictionary"
        end

        unless crypt_filters.key?(stream_filter)
            raise EncryptionError, "Invalid StmF value in encryption dictionary"
        end
    else
        raise EncryptionNotSupportedError, "Unsupported encryption version : #{handler.V}"
    end

    doc_id = trailer_key(:ID)
    unless doc_id.is_a?(Array)
        raise EncryptionError, "Document ID was not found or is invalid" unless handler.V.to_i == 5
    else
        doc_id = doc_id.first
    end

    if handler.is_user_password?(passwd, doc_id)
        encryption_key = handler.compute_user_encryption_key(passwd, doc_id)
    elsif handler.is_owner_password?(passwd, doc_id)
        if handler.V.to_i < 5
            user_passwd = handler.retrieve_user_password(passwd)
            encryption_key = handler.compute_user_encryption_key(user_passwd, doc_id)
        else
            encryption_key = handler.compute_owner_encryption_key(passwd)
        end
    else
        raise EncryptionInvalidPasswordError
    end

     = (handler.EncryptMetadata != false)

    self.extend(Encryption::EncryptedDocument)
    self.encryption_handler = handler
    self.crypt_filters = crypt_filters
    self.encryption_key = encryption_key
    self.stm_filter, self.str_filter = stream_filter, string_filter

    #
    # Should be fixed to exclude only the active XRefStream
    #
     = self.Catalog.Metadata

    self.indirect_objects.each do |indobj|
        encrypted_objects = []
        case indobj
        when String,Stream then encrypted_objects << indobj
        when Dictionary,Array then encrypted_objects |= indobj.strings_cache
        end

        encrypted_objects.each do |obj|
            case obj
            when String
                next if obj.equal?(encrypt_dict[:U]) or
                        obj.equal?(encrypt_dict[:O]) or
                        obj.equal?(encrypt_dict[:UE]) or
                        obj.equal?(encrypt_dict[:OE]) or
                        obj.equal?(encrypt_dict[:Perms]) or
                        (obj.parent.is_a?(Signature::DigitalSignature) and
                         obj.equal?(obj.parent[:Contents]))

                obj.extend(Encryption::EncryptedString) unless obj.is_a?(Encryption::EncryptedString)
                obj.decrypt!

            when Stream
                next if obj.is_a?(XRefStream) or (not  and obj.equal?())

                obj.extend(Encryption::EncryptedStream) unless obj.is_a?(Encryption::EncryptedStream)
            end
        end
    end

    self
end

#delete_object(no, generation = 0) ⇒ Object

Remove an object.



475
476
477
478
479
480
481
482
483
484
485
486
487
488
# File 'lib/origami/pdf.rb', line 475

def delete_object(no, generation = 0)
    case no
    when Reference
        target = no
    when ::Integer
        target = Reference.new(no, generation)
    else
        raise TypeError, "Invalid parameter type : #{no.class}"
    end

    @revisions.each do |rev|
        rev.body.delete(target)
    end
end

#delinearize!Object

Tries to delinearize the document if it has been linearized. This operation is xrefs destructive, should be fixed in the future to merge tables.

Raises:



45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# File 'lib/origami/linearization.rb', line 45

def delinearize!
    raise LinearizationError, 'Not a linearized document' unless self.linearized?

    #
    # Saves the first trailer.
    #
    prev_trailer = @revisions.first.trailer

    lin_dict = @revisions.first.objects.min_by{|obj| obj.file_offset}
    hints = lin_dict[:H]

    #
    # Removes hint streams used by linearization.
    #
    if hints.is_a?(::Array)
        if hints.length > 0 and hints[0].is_a?(Integer)
            hint_stream = get_object_by_offset(hints[0])
            delete_object(hint_stream.reference) if hint_stream.is_a?(Stream)
        end

        if hints.length > 2 and hints[2].is_a?(Integer)
            overflow_stream = get_object_by_offset(hints[2])
            delete_object(overflow_stream.reference) if overflow_stream.is_a?(Stream)
        end
    end

    #
    # Update the trailer.
    #
    last_trailer = (@revisions.last.trailer ||= Trailer.new)

    last_trailer.dictionary ||= Dictionary.new

    if prev_trailer.has_dictionary?
        last_trailer.dictionary =
            last_trailer.dictionary.merge(prev_trailer.dictionary)
    else
        xrefstm = @revisions.last.xrefstm
        raise LinearizationError,
                'Cannot find trailer info while delinearizing document' unless xrefstm.is_a?(XRefStream)

        last_trailer.dictionary[:Root] = xrefstm[:Root]
        last_trailer.dictionary[:Encrypt] = xrefstm[:Encrypt]
        last_trailer.dictionary[:Info] = xrefstm[:Info]
        last_trailer.dictionary[:ID] = xrefstm[:ID]
    end

    #
    # Remove all xrefs.
    # Fix: Should be merged instead.
    #
    remove_xrefs

    #
    # Remove the linearization revision.
    #
    @revisions.first.body.delete(lin_dict.reference)
    @revisions.last.body.merge! @revisions.first.body

    remove_revision(0)

    self
end

#document_infoObject

Returns the document information dictionary if present.



36
37
38
# File 'lib/origami/metadata.rb', line 36

def document_info
    trailer_key :Info
end

#document_info?Boolean

Returns true if the document has a document information dictionary.

Returns:



29
30
31
# File 'lib/origami/metadata.rb', line 29

def document_info?
    trailer_key? :Info
end

#each_fieldObject

Iterates over each Acroform Field.



68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# File 'lib/origami/acroform.rb', line 68

def each_field
    return enum_for(__method__) do
        if self.form? and self.Catalog.AcroForm[:Fields].is_a?(Array)
            self.Catalog.AcroForm[:Fields].length
        else
            0
        end
    end unless block_given?

    if self.form? and self.Catalog.AcroForm[:Fields].is_a?(Array)
        self.Catalog.AcroForm[:Fields].each do |field|
            yield(field.solve)
        end
    end
end

#each_name(root, &block) ⇒ Object

Returns an Enumerator of all names under the specified root name directory.



163
164
165
166
167
168
169
170
171
# File 'lib/origami/catalog.rb', line 163

def each_name(root, &block)
    return enum_for(__method__, root) unless block_given?

    names_root = get_names_root(root)
    return if names_root.nil?

    names_from_node(names_root, &block)
    self
end

#each_named_dest(&b) ⇒ Object

Calls block for each named destination.



34
35
36
# File 'lib/origami/destinations.rb', line 34

def each_named_dest(&b)
    each_name(Names::DESTINATIONS, &b)
end

#each_named_embedded_file(&b) ⇒ Object Also known as: each_attachment

Calls block for each named embedded file.



74
75
76
# File 'lib/origami/filespec.rb', line 74

def each_named_embedded_file(&b)
    each_name(Names::EMBEDDED_FILES, &b)
end

#each_named_page(&b) ⇒ Object

Calls block for each named page.



115
116
117
# File 'lib/origami/page.rb', line 115

def each_named_page(&b)
    each_name(Names::PAGES, &b)
end

#each_named_script(&b) ⇒ Object

Calls block for each named JavaScript script.



34
35
36
# File 'lib/origami/actions.rb', line 34

def each_named_script(&b)
    each_name(Names::JAVASCRIPT, &b)
end

#each_object(compressed: false, recursive: false) ⇒ Object

Iterates over the objects of the document. compressed: iterates over the objects inside object streams. recursive: iterates recursively inside objects like arrays and dictionaries.



332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
# File 'lib/origami/pdf.rb', line 332

def each_object(compressed: false, recursive: false)
    return enum_for(__method__, compressed: compressed,
                                recursive: recursive
                   ) unless block_given?

    walk_object = -> (object) do
        case object
        when Dictionary
            object.each_value do |value|
                yield(value)
                walk_object.call(value)
            end

        when Array
            object.each do |child|
                yield(child)
                walk_object.call(child)
            end

        when Stream
            yield(object.dictionary)
            walk_object.call(object.dictionary)
        end
    end

    @revisions.each do |revision|
        revision.each_object do |object|
            yield(object)

            walk_object.call(object) if recursive

            if object.is_a?(ObjectStream) and compressed
                object.each do |child_obj|
                    yield(child_obj)

                    walk_object.call(child_obj) if recursive
                end
            end
        end
    end
end

#each_page(&b) ⇒ Object

Iterate through each page, returns self.



86
87
88
89
90
91
92
# File 'lib/origami/page.rb', line 86

def each_page(&b)
    unless self.Catalog and self.Catalog.Pages and self.Catalog.Pages.is_a?(PageTreeNode)
        raise InvalidPageTreeError, "Invalid page tree"
    end

    self.Catalog.Pages.each_page(&b)
end

#enable_usage_rights(cert, pkey, *rights) ⇒ Object

Enable the document Usage Rights.

rights

list of rights defined in UsageRights::Rights



277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
# File 'lib/origami/signature.rb', line 277

def enable_usage_rights(cert, pkey, *rights)
    unless Origami::OPTIONS[:use_openssl]
        fail "OpenSSL is not present or has been disabled."
    end

    signfield_size = -> (crt, key, ca) do
        OpenSSL::PKCS7.sign(
            crt,
            key,
            '',
            ca,
            OpenSSL::PKCS7::DETACHED | OpenSSL::PKCS7::BINARY
        ).to_der.size
    end

    #
    # Load key pair
    #
    key = pkey.is_a?(OpenSSL::PKey::RSA) ? pkey : OpenSSL::PKey::RSA.new(pkey)
    certificate = cert.is_a?(OpenSSL::X509::Certificate) ? cert : OpenSSL::X509::Certificate.new(cert)

    #
    # Forge digital signature dictionary
    #
    digsig = Signature::DigitalSignature.new.set_indirect(true)

    self.Catalog.AcroForm ||= InteractiveForm.new
    #self.Catalog.AcroForm.SigFlags = InteractiveForm::SigFlags::APPENDONLY

    digsig.Type = :Sig #:nodoc:
    digsig.Contents = HexaString.new("\x00" * signfield_size[certificate, key, []]) #:nodoc:
    digsig.Filter = :"Adobe.PPKLite" #:nodoc:
    digsig.Name = "ARE Acrobat Product v8.0 P23 0002337" #:nodoc:
    digsig.SubFilter = :"adbe.pkcs7.detached" #:nodoc:
    digsig.ByteRange = [0, 0, 0, 0] #:nodoc:

    sigref = Signature::Reference.new #:nodoc:
    sigref.Type = :SigRef #:nodoc:
    sigref.TransformMethod = :UR3 #:nodoc:
    sigref.Data = self.Catalog

    sigref.TransformParams = UsageRights::TransformParams.new
    sigref.TransformParams.P = true #:nodoc:
    sigref.TransformParams.Type = :TransformParams #:nodoc:
    sigref.TransformParams.V = UsageRights::TransformParams::VERSION

    rights.each do |right|
        sigref.TransformParams[right.first] ||= []
        sigref.TransformParams[right.first].concat(right[1..-1])
    end

    digsig.Reference = [ sigref ]

    self.Catalog.Perms ||= Perms.new
    self.Catalog.Perms.UR3 = digsig

    #
    #  Flattening the PDF to get file view.
    #
    compile

    #
    # Creating an empty Xref table to compute signature byte range.
    #
    rebuild_dummy_xrefs

    sig_offset = get_object_offset(digsig.no, digsig.generation) + digsig.signature_offset

    digsig.ByteRange[0] = 0
    digsig.ByteRange[1] = sig_offset
    digsig.ByteRange[2] = sig_offset + digsig.Contents.size

    until digsig.ByteRange[3] == filesize - digsig.ByteRange[2]
        digsig.ByteRange[3] = filesize - digsig.ByteRange[2]
    end

    # From that point on, the file size remains constant

    #
    # Correct Xrefs variations caused by ByteRange modifications.
    #
    rebuild_xrefs

    file_data = output()
    signable_data = file_data[digsig.ByteRange[0],digsig.ByteRange[1]] +
        file_data[digsig.ByteRange[2],digsig.ByteRange[3]]

    signature = OpenSSL::PKCS7.sign(
        certificate,
        key,
        signable_data,
        [],
        OpenSSL::PKCS7::DETACHED | OpenSSL::PKCS7::BINARY
    ).to_der
    digsig.Contents[0, signature.size] = signature

    #
    # No more modification are allowed after signing.
    #
    self.freeze
end

#encrypt(options = {}) ⇒ Object

Encrypts the current document with the provided passwords. The document will be encrypted at writing-on-disk time.

userpasswd

The user password.

ownerpasswd

The owner password.

options

A set of options to configure encryption.

Raises:



179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
# File 'lib/origami/encryption.rb', line 179

def encrypt(options = {})
    raise EncryptionError, "PDF is already encrypted" if self.encrypted?

    #
    # Default encryption options.
    #
    params =
    {
        :user_passwd => '',
        :owner_passwd => '',
        :cipher => 'rc4',            # :RC4 or :AES
        :key_size => 128,            # Key size in bits
        :hardened => false,          # Use newer password validation (since Reader X)
        :encrypt_metadata => true,   # Metadata shall be encrypted?
        :permissions => Encryption::Standard::Permissions::ALL    # Document permissions
    }.update(options)

    userpasswd, ownerpasswd = params[:user_passwd], params[:owner_passwd]

    case params[:cipher].upcase
    when 'RC4'
        algorithm = Encryption::RC4
        if (40..128) === params[:key_size] and params[:key_size] % 8 == 0
            if params[:key_size] > 40
                version = 2
                revision = 3
            else
                version = 1
                revision = 2
            end
        else
            raise EncryptionError, "Invalid RC4 key length"
        end

        crypt_filters = Hash.new(algorithm)
        string_filter = stream_filter = nil

    when 'AES'
        algorithm = Encryption::AES
        if params[:key_size] == 128
            version = revision = 4
        elsif params[:key_size] == 256
            version = 5
            if params[:hardened]
                revision = 6
            else
                revision = 5
            end
        else
            raise EncryptionError, "Invalid AES key length (Only 128 and 256 bits keys are supported)"
        end

        crypt_filters = {
            Identity: Encryption::Identity,
            StdCF: algorithm
        }
        string_filter = stream_filter = :StdCF

    else
        raise EncryptionNotSupportedError, "Cipher not supported : #{params[:cipher]}"
    end

    doc_id = (trailer_key(:ID) || generate_id).first

    handler = Encryption::Standard::Dictionary.new
    handler.Filter = :Standard #:nodoc:
    handler.V = version
    handler.R = revision
    handler.Length = params[:key_size]
    handler.P = -1 # params[:Permissions]

    if revision >= 4
        handler.EncryptMetadata = params[:encrypt_metadata]
        handler.CF = Dictionary.new
        cryptfilter = Encryption::CryptFilterDictionary.new
        cryptfilter.AuthEvent = :DocOpen

        if revision == 4
            cryptfilter.CFM = :AESV2
        else
            cryptfilter.CFM = :AESV3
        end

        cryptfilter.Length = params[:key_size] >> 3

        handler.CF[:StdCF] = cryptfilter
        handler.StmF = handler.StrF = :StdCF
    end

    handler.set_passwords(ownerpasswd, userpasswd, doc_id)
    encryption_key = handler.compute_user_encryption_key(userpasswd, doc_id)

    file_info = get_trailer_info
    file_info[:Encrypt] = self << handler

    self.extend(Encryption::EncryptedDocument)
    self.encryption_handler = handler
    self.encryption_key = encryption_key
    self.crypt_filters = crypt_filters
    self.stm_filter = self.str_filter = :StdCF

    self
end

#encrypted?Boolean

Returns whether the PDF file is encrypted.

Returns:



47
48
49
# File 'lib/origami/encryption.rb', line 47

def encrypted?
    trailer_key? :Encrypt
end

#export_to_graph(path) ⇒ Object

Exports the document to a dot Graphiz file.

filename

The path where to save the file.



29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# File 'lib/origami/export.rb', line 29

def export_to_graph(path)

    appearance = -> (object) do
        label = object.type.to_s
        case object
        when Catalog
            fontcolor = "red"
            color = "mistyrose"
            shape = "ellipse"
        when Name, Number
            label = object.value
            fontcolor = "brown"
            color = "lightgoldenrodyellow"
            shape = "polygon"
        when String
            label = object.value if (object.ascii_only? and object.length <= 50)
            fontcolor = "red"
            color = "white"
            shape = "polygon"
        when Array
            fontcolor = "darkgreen"
            color = "lightcyan"
            shape = "ellipse"
        else
          fontcolor = "blue"
          color = "aliceblue"
          shape = "ellipse"
        end

        { label: label, fontcolor: fontcolor, color: color, shape: shape }
    end

    add_edges = -> (fd, object) do
        if object.is_a?(Array) or object.is_a?(ObjectStream)
            object.each do |subobj|
                fd << "\t#{object.object_id} -> #{subobj.solve.object_id}\n"
            end

        elsif object.is_a?(Dictionary)
            object.each_pair do |name, subobj|
                fd << "\t#{object.object_id} -> #{subobj.solve.object_id} "
                fd << "[label=\"#{name.value}\",fontsize=9];\n"
            end
        end

        if object.is_a?(Stream)
            object.dictionary.each_pair do |key, value|
                fd << "\t#{object.object_id} -> #{value.solve.object_id} "
                fd << "[label=\"#{key.value}\",fontsize=9];\n"
            end
        end
    end

    graph_name = "PDF" if graph_name.nil? or graph_name.empty?
    fd = File.open(path, "w")

    begin
        fd << "digraph #{graph_name} {\n\n"

        objects = self.objects(include_keys: false).find_all{ |obj| not obj.is_a?(Reference) }

        objects.each do |object|
            attr = appearance[object]

            fd << "\t#{object.object_id} "
            fd << "[label=\"#{attr[:label]}\",shape=#{attr[:shape]},color=#{attr[:color]},style=filled,fontcolor=#{attr[:fontcolor]},fontsize=16];\n"

            if object.is_a?(Stream)
                object.dictionary.each do |value|
                    unless value.is_a?(Reference)
                        attr = appearance[value]
                        fd << "\t#{value.object_id} "
                        fd << "[label=\"#{attr[:label]}\",shape=#{attr[:shape]},color=#{attr[:color]},style=filled,fontcolor=#{attr[:fontcolor]},fontsize=16];\n"
                    end
                end
            end

            add_edges.call(fd, object)
        end

        fd << "\n}"
    ensure
        fd.close
    end
end

#export_to_graphml(path) ⇒ Object

Exports the document to a GraphML file.

filename

The path where to save the file.



119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
# File 'lib/origami/export.rb', line 119

def export_to_graphml(path)
    require 'rexml/document'

    declare_node = -> (id, attr) do
        <<-XML
        <node id="#{id}">
            <data key="d0">
                <y:ShapeNode>
                    <y:NodeLabel>#{attr[:label]}</y:NodeLabel>
                </y:ShapeNode>
            </data>
        </node>
        XML
    end

    declare_edge = -> (id, src, dest, label = nil) do
        <<-XML
        <edge id="#{id}" source="#{src}" target="#{dest}">
            <data key="d1">
                <y:PolyLineEdge>
                    <y:LineStyle type="line" width="1.0" color="#000000"/>
                    <y:Arrows source="none" target="standard"/>
                    <y:EdgeLabel>#{label.to_s}</y:EdgeLabel>
                </y:PolyLineEdge>
            </data>
        </edge>
        XML
    end

    appearance = -> (object) do
        label = object.type.to_s
        case object
        when Catalog
            fontcolor = "red"
            color = "mistyrose"
            shape = "doublecircle"
        when Name, Number
            label = object.value
            fontcolor = "orange"
            color = "lightgoldenrodyellow"
            shape = "polygon"
        when String
            label = object.value if (object.ascii_only? and object.length <= 50)
            fontcolor = "red"
            color = "white"
            shape = "polygon"
        when Array
            fontcolor = "green"
            color = "lightcyan"
            shape = "ellipse"
        else
          fontcolor = "blue"
          color = "aliceblue"
          shape = "ellipse"
        end

        { label: label, fontcolor: fontcolor, color: color, shape: shape }
    end

    add_edges = -> (xml, object, id) do
        if object.is_a?(Array) or object.is_a?(ObjectStream)
            object.each do |subobj|
                xml << declare_edge["e#{id}", "n#{object.object_id}", "n#{subobj.solve.object_id}"]
                id = id + 1
            end

        elsif object.is_a?(Dictionary)
            object.each_pair do |name, subobj|
                xml << declare_edge["e#{id}", "n#{object.object_id}", "n#{subobj.solve.object_id}",
                                   name.value]
                id = id + 1
            end
        end

        if object.is_a?(Stream)
            object.dictionary.each_pair do |key, value|
                xml << declare_edge["e#{id}", "n#{object.object_id}", "n#{value.object_id}", key.value]
                id = id + 1
            end
        end

        id
    end

    graph_name = "PDF" if graph_name.nil? or graph_name.empty?

    edge_nb = 1
    xml = <<-XML
        <?xml version="1.0" encoding="UTF-8"?>
        <graphml xmlns="http://graphml.graphdrawing.org/xmlns/graphml"
                 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
                 xsi:schemaLocation="http://graphml.graphdrawing.org/xmlns/graphml
                                     http://www.yworks.com/xml/schema/graphml/1.0/ygraphml.xsd"
                 xmlns:y="http://www.yworks.com/xml/graphml">
            <key id="d0" for="node" yfiles.type="nodegraphics"/>
            <key id="d1" for="edge" yfiles.type="edgegraphics"/>
            <graph id="#{graph_name}" edgedefault="directed">
    XML

    objects = self.objects(include_keys: false).find_all{ |obj| not obj.is_a?(Reference) }

    objects.each do |object|
        xml << declare_node["n#{object.object_id}", appearance[object]]

        if object.is_a?(Stream)
            object.dictionary.each do |value|
                unless value.is_a?(Reference)
                    xml << declare_node[value.object_id, appearance[value]]
                end
            end
        end

        edge_nb = add_edges[xml, object, edge_nb]
    end

    xml << '</graph>' << "\n"
    xml << '</graphml>'

    doc = REXML::Document.new(xml)
    formatter = REXML::Formatters::Pretty.new(4)
    formatter.compact = true

    File.open(path, "w") do |fd|
        formatter.write(doc, fd)
    end
end

#fieldsObject

Returns an Array of Acroform fields.



61
62
63
# File 'lib/origami/acroform.rb', line 61

def fields
    self.each_field.to_a
end

#form?Boolean

Returns true if the document contains an acrobat form.

Returns:



28
29
30
# File 'lib/origami/acroform.rb', line 28

def form?
    (not self.Catalog.nil?) and self.Catalog.has_key? :AcroForm
end

#get_destination_by_name(name) ⇒ Object

Lookup destination in the destination name directory.



27
28
29
# File 'lib/origami/destinations.rb', line 27

def get_destination_by_name(name)
    resolve_name Names::DESTINATIONS, name
end

#get_embedded_file_by_name(name) ⇒ Object

Lookup embedded file in the embedded files name directory.



67
68
69
# File 'lib/origami/filespec.rb', line 67

def get_embedded_file_by_name(name)
    resolve_name Names::EMBEDDED_FILES, name
end

#get_field(name) ⇒ Object

Returns the corresponding named Field.



87
88
89
90
91
# File 'lib/origami/acroform.rb', line 87

def get_field(name)
    self.each_field do |field|
        return field if field[:T].solve == name
    end
end

#get_object(no, generation = 0, use_xrefstm: true) ⇒ Object Also known as: []

Search for an indirect object in the document.

no

Reference or number of the object.

generation

Object generation.



495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
# File 'lib/origami/pdf.rb', line 495

def get_object(no, generation = 0, use_xrefstm: true) #:nodoc:
    case no
    when Reference
        target = no
    when ::Integer
        target = Reference.new(no, generation)
    when Origami::Object
        return no
    else
        raise TypeError, "Invalid parameter type : #{no.class}"
    end

    #
    # Search through accessible indirect objects.
    #
    @revisions.reverse_each do |rev|
        return rev.body[target] if rev.body.include?(target)
    end

    #
    # Search through xref sections.
    #
    @revisions.reverse_each do |rev|
        next unless rev.has_xreftable?

        xref = rev.xreftable.find(target.refno)
        next if xref.nil? or xref.free?

        # Try loading the object if it is not present.
        object = load_object_at_offset(rev, xref.offset)
        return object unless object.nil?
    end

    return nil unless use_xrefstm

    # Search through xref streams.
    @revisions.reverse_each do |rev|
        next unless rev.has_xrefstm?

        xrefstm = rev.xrefstm

        xref = xrefstm.find(target.refno)
        next if xref.nil?

        #
        # We found a matching XRef.
        #
        if xref.is_a?(XRefToCompressedObj)
            objstm = get_object(xref.objstmno, 0, use_xrefstm: use_xrefstm)

            object = objstm.extract_by_index(xref.index)
            if object.is_a?(Origami::Object) and object.no == target.refno
                return object
            else
                return objstm.extract(target.refno)
            end
        elsif xref.is_a?(XRef)
            object = load_object_at_offset(rev, xref.offset)
            return object unless object.nil?
        end
    end

    #
    # Lastly search directly into Object streams (might be very slow).
    #
    @revisions.reverse_each do |rev|
        stream = rev.objects.find{|obj| obj.is_a?(ObjectStream) and obj.include?(target.refno)}
        return stream.extract(target.refno) unless stream.nil?
    end

    nil
end

#get_object_by_offset(offset) ⇒ Object

Looking for an object present at a specified file offset.



468
469
470
# File 'lib/origami/pdf.rb', line 468

def get_object_by_offset(offset) #:nodoc:
    self.indirect_objects.find { |obj| obj.file_offset == offset }
end

#get_page(n) ⇒ Object

Get the n-th Page object.



97
98
99
100
101
102
103
# File 'lib/origami/page.rb', line 97

def get_page(n)
    unless self.Catalog and self.Catalog.Pages and self.Catalog.Pages.is_a?(PageTreeNode)
        raise InvalidPageTreeError, "Invalid page tree"
    end

    self.Catalog.Pages.get_page(n)
end

#get_page_by_name(name) ⇒ Object

Lookup page in the page name directory.



108
109
110
# File 'lib/origami/page.rb', line 108

def get_page_by_name(name)
    resolve_name Names::PAGES, name
end

#get_script_by_name(name) ⇒ Object

Lookup script in the scripts name directory.



27
28
29
# File 'lib/origami/actions.rb', line 27

def get_script_by_name(name)
    resolve_name Names::JAVASCRIPT, name
end

#grep(pattern, streams: true, object_streams: true) ⇒ Object

Returns an array of strings, names and streams matching the given pattern. streams: Search into decoded stream data. object_streams: Search into objects inside object streams.

Raises:

  • (TypeError)


273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
# File 'lib/origami/pdf.rb', line 273

def grep(pattern, streams: true, object_streams: true) #:nodoc:

    pattern = /#{Regexp.escape(pattern)}/i if pattern.is_a?(::String)
    raise TypeError, "Expected a String or Regexp" unless pattern.is_a?(Regexp)

    result = []

    search_object = -> (object) do
        case object
        when Stream
            result.concat object.dictionary.strings_cache.select{|str| pattern === str}
            result.concat object.dictionary.names_cache.select{|name| pattern === name.value}

            begin
                result.push object if streams and object.data.match(pattern)
            rescue Filter::Error
                next # Skip object if a decoding error occured.
            end

            next if object.is_a?(ObjectStream) and not object_streams

            object.each do |subobject|
                search_object.call(subobject)
            end

        when Name, String
            result.push object if object.value.match(pattern)

        when Dictionary, Array then
            result.concat object.strings_cache.select{|str| pattern === str}
            result.concat object.names_cache.select{|name| pattern === name.value}
        end
    end

    self.indirect_objects.each do |object|
        search_object.call(object)
    end

    result
end

#import(object) ⇒ Object

Similar to PDF#insert or PDF#<<, but for an object belonging to another document. Object will be recursively copied and new version numbers will be assigned. Returns the new reference to the imported object.

object

The object to import.



411
412
413
# File 'lib/origami/pdf.rb', line 411

def import(object)
    self.insert(object.export)
end

#indirect_objectsObject Also known as: root_objects

Return an array of indirect objects.



378
379
380
# File 'lib/origami/pdf.rb', line 378

def indirect_objects
    @revisions.inject([]) do |set, rev| set.concat(rev.objects) end
end

#insert_page(index, page = Page.new) {|page| ... } ⇒ Object

Inserts a page at position index into the document.

index

Page index (starting from zero).

page

The page to insert into the document. Creates a new one if none given.

Pass the Page object if a block is present.

Yields:

  • (page)


57
58
59
60
61
62
63
64
65
66
67
68
69
70
# File 'lib/origami/page.rb', line 57

def insert_page(index, page = Page.new)
    unless self.Catalog and self.Catalog.Pages and self.Catalog.Pages.is_a?(PageTreeNode)
        raise InvalidPageTreeError, "Invalid page tree"
    end

    # Page from another document must be exported.
    page = page.export if page.document and page.document != self

    self.Catalog.Pages.insert_page(index, page)

    yield(page) if block_given?

    self
end

#keywordsObject



43
# File 'lib/origami/metadata.rb', line 43

def keywords; get_document_info_field(:Keywords) end

#linearized?Boolean

Returns whether the current document is linearized.

Returns:



31
32
33
34
35
36
37
38
39
# File 'lib/origami/linearization.rb', line 31

def linearized?
    begin
        first_obj = @revisions.first.objects.min_by{|obj| obj.file_offset}
    rescue
        return false
    end

    @revisions.size > 1 and first_obj.is_a?(Dictionary) and first_obj.has_key? :Linearized
end

#loaded!Object

Mark the document as complete. No more objects needs to be fetched by the parser.



609
610
611
# File 'lib/origami/pdf.rb', line 609

def loaded!
    @loaded = true
end

#ls(pattern, follow_references: true) ⇒ Object

Returns an array of Objects whose name (in a Dictionary) is matching pattern.

Raises:

  • (TypeError)


317
318
319
320
321
322
323
324
325
# File 'lib/origami/pdf.rb', line 317

def ls(pattern, follow_references: true)

    pattern = /#{Regexp.escape(pattern)}/i if pattern.is_a?(::String)
    raise TypeError, "Expected a String or Regexp" unless pattern.is_a?(Regexp)

    self.grep(pattern, streams: false, object_streams: true)
        .select {|object| object.is_a?(Name) and object.parent.is_a?(Dictionary) and object.parent.key?(object) }
        .collect {|object| result = object.parent[object]; follow_references ? result.solve : result }
end

#metadataObject

Returns a Hash of the information found in the metadata stream



59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# File 'lib/origami/metadata.rb', line 59

def 
     = self.Catalog.Metadata

    if .is_a?(Stream)
        doc = REXML::Document.new(.data)
        info = {}

        doc.elements.each('*/*/rdf:Description') do |description|

            description.attributes.each_attribute do |attr|
                case attr.prefix
                when 'pdf','xap'
                    info[attr.name] = attr.value
                end
            end

            description.elements.each('*') do |element|
                value = (element.elements['.//rdf:li'] || element).text
                info[element.name] = value.to_s
            end
        end

        info
    end
end

#metadata?Boolean

Returns true if the document has a catalog metadata stream.

Returns:



52
53
54
# File 'lib/origami/metadata.rb', line 52

def metadata?
    self.Catalog.Metadata.is_a?(Stream)
end

#mod_dateObject



47
# File 'lib/origami/metadata.rb', line 47

def mod_date; get_document_info_field(:ModDate) end

#names(root) ⇒ Object

Returns a Hash of all names under the specified root name directory.



156
157
158
# File 'lib/origami/catalog.rb', line 156

def names(root)
    self.each_name(root).to_h
end

#onDocumentClose(action) ⇒ Object

Sets an action to run on document closing.

action

A JavaScript Action Object.



86
87
88
89
90
91
92
93
94
95
96
97
98
99
# File 'lib/origami/catalog.rb', line 86

def onDocumentClose(action)
    unless action.is_a?(Action::JavaScript) or action.is_a?(Reference)
        raise TypeError, "An Action::JavaScript object must be passed."
    end

    unless self.Catalog
        raise InvalidPDFError, "A catalog object must exist to add this action."
    end

    self.Catalog.AA ||= CatalogAdditionalActions.new
    self.Catalog.AA.WC = action

    self
end

#onDocumentOpen(action) ⇒ Object

Sets an action to run on document opening.

action

An Action Object.



68
69
70
71
72
73
74
75
76
77
78
79
80
# File 'lib/origami/catalog.rb', line 68

def onDocumentOpen(action)
    unless action.is_a?(Action) or action.is_a?(Destination) or action.is_a?(Reference)
        raise TypeError, "An Action object must be passed."
    end

    unless self.Catalog
        raise InvalidPDFError, "A catalog object must exist to add this action."
    end

    self.Catalog.OpenAction = action

    self
end

#onDocumentPrint(action) ⇒ Object

Sets an action to run on document printing.

action

A JavaScript Action Object.



105
106
107
108
109
110
111
112
113
114
115
116
117
118
# File 'lib/origami/catalog.rb', line 105

def onDocumentPrint(action)
    unless action.is_a?(Action::JavaScript) or action.is_a?(Reference)
        raise TypeError, "An Action::JavaScript object must be passed."
    end

    unless self.Catalog
        raise InvalidPDFError, "A catalog object must exist to add this action."
    end

    self.Catalog.AA ||= CatalogAdditionalActions.new
    self.Catalog.AA.WP = action

    self
end

#original_dataObject

Original data parsed to create this document, nil if created from scratch.



198
199
200
# File 'lib/origami/pdf.rb', line 198

def original_data
    @parser.target_data if @parser
end

#original_filenameObject

Original file name if parsed from disk, nil otherwise.



184
185
186
# File 'lib/origami/pdf.rb', line 184

def original_filename
    @parser.target_filename if @parser
end

#original_filesizeObject

Original file size if parsed from a data stream, nil otherwise.



191
192
193
# File 'lib/origami/pdf.rb', line 191

def original_filesize
    @parser.target_filesize if @parser
end

#pagesObject

Returns an Enumerator of Page



75
76
77
78
79
80
81
# File 'lib/origami/page.rb', line 75

def pages
    unless self.Catalog and self.Catalog.Pages and self.Catalog.Pages.is_a?(PageTreeNode)
        raise InvalidPageTreeError, "Invalid page tree"
    end

    self.Catalog.Pages.pages
end

#pdfa1?Boolean

Returns:



42
43
44
45
46
47
48
49
50
51
52
53
54
# File 'lib/origami/outputintents.rb', line 42

def pdfa1?
    self.Catalog.OutputIntents.is_a?(Array) and
    self.Catalog.OutputIntents.any?{|intent|
        intent.solve.S == OutputIntent::Intent::PDFA1
    } and
    self.metadata? and (
        doc = REXML::Document.new self.Catalog.Metadata.data;
        REXML::XPath.match(doc, "*/*/rdf:Description[@xmlns:pdfaid]").any? {|desc|
            desc.elements["pdfaid:conformance"].text == "A" and
            desc.elements["pdfaid:part"].text == "1"
        }
    )
end

#portfolio?Boolean

Returns true if the document behaves as a portfolio for embedded files.

Returns:



27
28
29
# File 'lib/origami/collections.rb', line 27

def portfolio?
    self.Catalog.Collection.is_a?(Dictionary)
end

#producerObject



45
# File 'lib/origami/metadata.rb', line 45

def producer; get_document_info_field(:Producer) end

#register(root, name, value) ⇒ Object

Registers an object into a specific Names root dictionary.

root

The root dictionary (see Names::Root)

name

The value name.

value

The value to associate with this name.



126
127
128
129
130
131
132
133
134
135
136
137
138
139
# File 'lib/origami/catalog.rb', line 126

def register(root, name, value)
    self.Catalog.Names ||= Names.new

    value.set_indirect(true) unless value.is_a?(Reference)

    namesroot = self.Catalog.Names[root]
    if namesroot.nil?
        names = NameTreeNode.new(:Names => []).set_indirect(true)
        self.Catalog.Names[root] = names
        names.Names << name << value
    else
        namesroot.solve[:Names] << name << value
    end
end

#remove_revision(index) ⇒ Object

Removes a whole document revision.

index

Revision index, first is 0.



452
453
454
455
456
457
458
459
460
461
462
463
# File 'lib/origami/pdf.rb', line 452

def remove_revision(index)
    if index < 0 or index > @revisions.size
        raise IndexError, "Not a valid revision index"
    end

    if @revisions.size == 1
        raise InvalidPDFError, "Cannot remove last revision"
    end

    @revisions.delete_at(index)
    self
end

#remove_xrefsObject

Tries to strip any xrefs information off the document.



27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/origami/xreftable.rb', line 27

def remove_xrefs

    # Delete a XRefStream and its ancestors.
    delete_xrefstm = -> (xrefstm) do
        prev = xrefstm.Prev
        delete_object(xrefstm.reference)

        if prev.is_a?(Integer) and (prev_stm = get_object_by_offset(prev)).is_a?(XRefStream)
            delete_xrefstm.call(prev_stm)
        end
    end

    @revisions.reverse_each do |rev|
        if rev.has_xrefstm?
            delete_xrefstm.call(rev.xrefstm)
        end

        if rev.trailer.has_dictionary? and rev.trailer.XRefStm.is_a?(Integer)
            xrefstm = get_object_by_offset(rev.trailer.XRefStm)

            delete_xrefstm.call(xrefstm) if xrefstm.is_a?(XRefStream)
        end

        rev.xrefstm = rev.xreftable = nil
    end
end

#resolve_name(root, name) ⇒ Object

Retrieve the corresponding value associated with name in the specified root name directory, or nil if the value does not exist.



146
147
148
149
150
151
# File 'lib/origami/catalog.rb', line 146

def resolve_name(root, name)
    namesroot = get_names_root(root)
    return nil if namesroot.nil?

    resolve_name_from_node(namesroot, name)
end

#save(path, params = {}) ⇒ Object Also known as: write

Saves the current document.

filename

The path where to save this PDF.



221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
# File 'lib/origami/pdf.rb', line 221

def save(path, params = {})
    options =
    {
        delinearize: true,
        recompile: true,
        decrypt: false
    }
    options.update(params)

    if self.frozen? # incompatible flags with frozen doc (signed)
        options[:recompile] =
        options[:rebuild_xrefs] =
        options[:noindent] =
        options[:obfuscate] = false
    end

    if path.respond_to?(:write)
        fd = path
    else
        path = File.expand_path(path)
        fd = File.open(path, 'w').binmode
        close = true
    end

    load_all_objects unless @loaded

    intents_as_pdfa1 if options[:intent] =~ /pdf[\/-]?A1?/i
    self.delinearize! if options[:delinearize] and self.linearized?
    compile(options) if options[:recompile]

    fd.write output(options)
    fd.close if close

    self
end

#save_upto(revision, filename) ⇒ Object

Saves the file up to given revision number. This can be useful to visualize the modifications over different incremental updates.

revision

The revision number to save.

filename

The path where to save this PDF.



264
265
266
# File 'lib/origami/pdf.rb', line 264

def save_upto(revision, filename)
    save(filename, up_to_revision: revision)
end

#serialize(filename) ⇒ Object

Serializes the current PDF.



205
206
207
208
209
210
211
212
213
214
215
# File 'lib/origami/pdf.rb', line 205

def serialize(filename)
    parser = @parser
    @parser = nil # do not serialize the parser

    Zlib::GzipWriter.open(filename) { |gz|
        gz.write Marshal.dump(self)
    }

    @parser = parser
    self
end

#set_extension_level(version, level) ⇒ Object

Sets PDF extension level and version. Only supported values are “1.7” and 3.



27
28
29
30
31
32
33
34
35
# File 'lib/origami/catalog.rb', line 27

def set_extension_level(version, level)
    exts = (self.Catalog.Extensions ||= Extensions.new)

    exts[:ADBE] = DeveloperExtension.new
    exts[:ADBE].BaseVersion = Name.new(version)
    exts[:ADBE].ExtensionLevel = level

    self
end

#sign(certificate, key, method: "adbe.pkcs7.detached", ca: [], annotation: nil, issuer: nil, location: nil, contact: nil, reason: nil) ⇒ Object

Sign the document with the given key and x509 certificate.

certificate

The X509 certificate containing the public key.

key

The private key associated with the certificate.

method

The PDF signature identifier.

ca

Optional CA certificates used to sign the user certificate.

annotation

Annotation associated with the signature.

issuer

Issuer name.

location

Signature location.

contact

Signer contact.

reason

Signing reason.



101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
# File 'lib/origami/signature.rb', line 101

def sign(certificate, key,
         method: "adbe.pkcs7.detached",
         ca: [],
         annotation: nil,
         issuer: nil,
         location: nil,
         contact: nil,
         reason: nil)

    unless Origami::OPTIONS[:use_openssl]
        fail "OpenSSL is not present or has been disabled."
    end

    unless certificate.is_a?(OpenSSL::X509::Certificate)
        raise TypeError, "A OpenSSL::X509::Certificate object must be passed."
    end

    unless key.is_a?(OpenSSL::PKey::RSA)
        raise TypeError, "A OpenSSL::PKey::RSA object must be passed."
    end

    unless ca.is_a?(::Array)
        raise TypeError, "Expected an Array of CA certificate."
    end

    unless annotation.nil? or annotation.is_a?(Annotation::Widget::Signature)
        raise TypeError, "Expected a Annotation::Widget::Signature object."
    end

    case method
    when 'adbe.pkcs7.detached'
        signfield_size = -> (crt, pkey, certs) do
            OpenSSL::PKCS7.sign(
                crt,
                pkey,
                "",
                certs,
                OpenSSL::PKCS7::DETACHED | OpenSSL::PKCS7::BINARY
            ).to_der.size
        end

    when 'adbe.pkcs7.sha1'
      signfield_size = -> (crt, pkey, certs) do
            OpenSSL::PKCS7.sign(
                crt,
                pkey,
                Digest::SHA1.digest(''),
                certs,
                OpenSSL::PKCS7::BINARY
            ).to_der.size
        end

    when 'adbe.x509.rsa_sha1'
        signfield_size = -> (crt, pkey, certs) do
            pkey.private_encrypt(
              Digest::SHA1.digest('')
            ).size
        end
        raise NotImplementedError, "Unsupported method #{method.inspect}"

    else
        raise NotImplementedError, "Unsupported method #{method.inspect}"
    end

    digsig = Signature::DigitalSignature.new.set_indirect(true)

    if annotation.nil?
        annotation = Annotation::Widget::Signature.new
        annotation.Rect = Rectangle[:llx => 0.0, :lly => 0.0, :urx => 0.0, :ury => 0.0]
    end

    annotation.V = digsig
    add_fields(annotation)
    self.Catalog.AcroForm.SigFlags =
        InteractiveForm::SigFlags::SIGNATURESEXIST | InteractiveForm::SigFlags::APPENDONLY

    digsig.Type = :Sig #:nodoc:
    digsig.Contents = HexaString.new("\x00" * signfield_size[certificate, key, ca]) #:nodoc:
    digsig.Filter = :"Adobe.PPKLite" #:nodoc:
    digsig.SubFilter = Name.new(method) #:nodoc:
    digsig.ByteRange = [0, 0, 0, 0] #:nodoc:
    digsig.Name = issuer

    digsig.Location = HexaString.new(location) if location
    digsig.ContactInfo = HexaString.new(contact) if contact
    digsig.Reason = HexaString.new(reason) if reason

    if method == 'adbe.x509.rsa_sha1'
        digsig.Cert =
            if ca.empty?
                HexaString.new(certificate.to_der)
            else
                [ HexaString.new(certificate.to_der) ] + ca.map{ |crt| HexaString.new(crt.to_der) }
            end
    end

    #
    #  Flattening the PDF to get file view.
    #
    compile

    #
    # Creating an empty Xref table to compute signature byte range.
    #
    rebuild_dummy_xrefs

    sig_offset = get_object_offset(digsig.no, digsig.generation) + digsig.signature_offset

    digsig.ByteRange[0] = 0
    digsig.ByteRange[1] = sig_offset
    digsig.ByteRange[2] = sig_offset + digsig.Contents.to_s.bytesize

    until digsig.ByteRange[3] == filesize - digsig.ByteRange[2]
        digsig.ByteRange[3] = filesize - digsig.ByteRange[2]
    end

    # From that point on, the file size remains constant

    #
    # Correct Xrefs variations caused by ByteRange modifications.
    #
    rebuild_xrefs

    file_data = output()
    signable_data = file_data[digsig.ByteRange[0],digsig.ByteRange[1]] +
        file_data[digsig.ByteRange[2],digsig.ByteRange[3]]

    signature =
        case method
        when 'adbe.pkcs7.detached'
            OpenSSL::PKCS7.sign(
                certificate,
                key,
                signable_data,
                ca,
                OpenSSL::PKCS7::DETACHED | OpenSSL::PKCS7::BINARY
            ).to_der

        when 'adbe.pkcs7.sha1'
            OpenSSL::PKCS7.sign(
                certificate,
                key,
                Digest::SHA1.digest(signable_data),
                ca,
                OpenSSL::PKCS7::BINARY
            ).to_der

        when 'adbe.x509.rsa_sha1'
            key.private_encrypt(Digest::SHA1.digest(signable_data))
        end

    digsig.Contents[0, signature.size] = signature

    #
    # No more modification are allowed after signing.
    #
    self.freeze
end

#signatureObject

Raises:



384
385
386
387
388
389
390
391
392
# File 'lib/origami/signature.rb', line 384

def signature
    raise SignatureError, "Not a signed document" unless self.signed?

    self.each_field do |field|
        return field.V if field.FT == :Sig and field.V.is_a?(Dictionary)
    end

    raise SignatureError, "Cannot find digital signature"
end

#signed?Boolean

Returns whether the document contains a digital signature.

Returns:



263
264
265
266
267
268
269
270
271
# File 'lib/origami/signature.rb', line 263

def signed?
    begin
        self.Catalog.AcroForm.is_a?(Dictionary) and
        self.Catalog.AcroForm.has_key?(:SigFlags) and
        (self.Catalog.AcroForm.SigFlags & InteractiveForm::SigFlags::SIGNATURESEXIST != 0)
    rescue InvalidReferenceError
        false
    end
end

#subjectObject



42
# File 'lib/origami/metadata.rb', line 42

def subject; get_document_info_field(:Subject) end

#titleObject



40
# File 'lib/origami/metadata.rb', line 40

def title; get_document_info_field(:Title) end

#usage_rights?Boolean

Returns:



379
380
381
382
# File 'lib/origami/signature.rb', line 379

def usage_rights?
    not self.Catalog.Perms.nil? and
        (not self.Catalog.Perms.has_key?(:UR3) or not self.Catalog.Perms.has_key?(:UR))
end

#verify(trusted_certs: []) ⇒ Object

Verify a document signature.

_:trusted_certs_: an array of trusted X509 certificates.
If no argument is passed, embedded certificates are treated as trusted.


40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# File 'lib/origami/signature.rb', line 40

def verify(trusted_certs: [])
    unless Origami::OPTIONS[:use_openssl]
        fail "OpenSSL is not present or has been disabled."
    end

    digsig = self.signature

    unless digsig[:Contents].is_a?(String)
        raise SignatureError, "Invalid digital signature contents"
    end

    store = OpenSSL::X509::Store.new
    trusted_certs.each do |ca| store.add_cert(ca) end
    flags = 0
    flags |= OpenSSL::PKCS7::NOVERIFY if trusted_certs.empty?

    stream = StringScanner.new(self.original_data)
    stream.pos = digsig[:Contents].file_offset
    Object.typeof(stream).parse(stream)
    endofsig_offset = stream.pos
    stream.terminate

    s1,l1,s2,l2 = digsig.ByteRange
    if s1.value != 0 or
        (s2.value + l2.value) != self.original_data.size or
        (s1.value + l1.value) != digsig[:Contents].file_offset or
        s2.value != endofsig_offset

        raise SignatureError, "Invalid signature byte range"
    end

    data = self.original_data[s1,l1] + self.original_data[s2,l2]

    case digsig.SubFilter.value.to_s
    when 'adbe.pkcs7.detached'
        flags |= OpenSSL::PKCS7::DETACHED
        p7 = OpenSSL::PKCS7.new(digsig[:Contents].value)
        raise SignatureError, "Not a PKCS7 detached signature" unless p7.detached?
        p7.verify([], store, data, flags)

    when 'adbe.pkcs7.sha1'
        p7 = OpenSSL::PKCS7.new(digsig[:Contents].value)
        p7.verify([], store, nil, flags) and p7.data == Digest::SHA1.digest(data)

    else
        raise NotImplementedError, "Unsupported method #{digsig.SubFilter}"
    end
end

#xfa_form?Boolean

Returns:



37
38
39
# File 'lib/origami/xfa.rb', line 37

def xfa_form?
    self.form? and self.Catalog.AcroForm.key?(:XFA)
end