Module: Origami::Object

Included in:
Array, Boolean, Dictionary, Name, Null, Number, Reference, Stream, String
Defined in:
lib/origami/object.rb,
lib/origami/obfuscation.rb

Overview

Parent module representing a PDF Object. PDF specification declares a set of primitive object types :

  • Null

  • Boolean

  • Integer

  • Real

  • Name

  • String

  • Array

  • Dictionary

  • Stream

Constant Summary collapse

TOKENS =

:nodoc:

%w{ obj endobj }
@@regexp_obj =
Regexp.new(WHITESPACES + "(?<no>\\d+)" + WHITESPACES + "(?<gen>\\d+)" +
WHITESPACES + TOKENS.first + WHITESPACES)
@@regexp_endobj =
Regexp.new(WHITESPACES + TOKENS.last + WHITESPACES)

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#file_offsetObject

Returns the value of attribute file_offset.



296
297
298
# File 'lib/origami/object.rb', line 296

def file_offset
  @file_offset
end

#generationObject

Returns the value of attribute generation.



296
297
298
# File 'lib/origami/object.rb', line 296

def generation
  @generation
end

#noObject

Returns the value of attribute no.



296
297
298
# File 'lib/origami/object.rb', line 296

def no
  @no
end

#objstm_offsetObject

Returns the value of attribute objstm_offset.



296
297
298
# File 'lib/origami/object.rb', line 296

def objstm_offset
  @objstm_offset
end

#parentObject

Returns the value of attribute parent.



297
298
299
# File 'lib/origami/object.rb', line 297

def parent
  @parent
end

Class Method Details

.native_typeObject

:nodoc:



632
# File 'lib/origami/object.rb', line 632

def self.native_type; Origami::Object end

.parse(stream, parser = nil) ⇒ Object

:nodoc:



566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
# File 'lib/origami/object.rb', line 566

def parse(stream, parser = nil) #:nodoc:
    offset = stream.pos

    #
    # End of body ?
    #
    return nil if stream.match?(/xref/) or stream.match?(/trailer/) or stream.match?(/startxref/)

    if stream.scan(@@regexp_obj).nil?
      raise InvalidObjectError,
        "Object shall begin with '%d %d obj' statement"
    end

    no = stream['no'].to_i
    gen = stream['gen'].to_i

    type = typeof(stream)
    if type.nil?
        raise InvalidObjectError,
                "Cannot determine object (no:#{no},gen:#{gen}) type"
    end

    begin
        new_obj = type.parse(stream, parser)
    rescue
        raise InvalidObjectError,
                "Failed to parse object (no:#{no},gen:#{gen})\n\t -> [#{$!.class}] #{$!.message}"
    end

    new_obj.set_indirect(true)
    new_obj.no = no
    new_obj.generation = gen
    new_obj.file_offset = offset

    if stream.skip(@@regexp_endobj).nil?
        raise UnterminatedObjectError.new("Object shall end with 'endobj' statement", new_obj)
    end

    new_obj
end

.skip_until_next_obj(stream) ⇒ Object

:nodoc:



607
608
609
610
611
612
613
614
615
616
# File 'lib/origami/object.rb', line 607

def skip_until_next_obj(stream) #:nodoc:
    [ @@regexp_obj, /xref/, /trailer/, /startxref/ ].each do |re|
        if stream.scan_until(re)
            stream.pos -= stream.matched_size
            return true
        end
    end

    false
end

.typeof(stream, noref = false) ⇒ Object

:nodoc:



539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
# File 'lib/origami/object.rb', line 539

def typeof(stream, noref = false) #:nodoc:
    stream.skip(REGEXP_WHITESPACES)

    case stream.peek(1)
    when '/' then return Name
    when '<'
        return (stream.peek(2) == '<<') ? Stream : HexaString
    when '(' then return LiteralString
    when '[' then return Origami::Array
    when 'n' then
        return Null if stream.peek(4) == 'null'
    when 't' then
        return Boolean if stream.peek(4) == 'true'
    when 'f' then
        return Boolean if stream.peek(5) == 'false'
    else
        if not noref and stream.check(Reference::REGEXP_TOKEN) then return Reference
        elsif stream.check(Real::REGEXP_TOKEN) then return Real
        elsif stream.check(Integer::REGEXP_TOKEN) then return Integer
        else
            nil
        end
    end

    nil
end

Instance Method Details

#<=>(obj) ⇒ Object

Compare two objects from their respective numbers.



348
349
350
# File 'lib/origami/object.rb', line 348

def <=>(obj)
    [@no, @generation] <=> [obj.no, obj.generation]
end

#cast_to(type, _parser = nil) ⇒ Object

:nodoc:



641
642
643
644
645
646
647
# File 'lib/origami/object.rb', line 641

def cast_to(type, _parser = nil) #:nodoc:
    if type.native_type != self.native_type
        raise TypeError, "Incompatible cast from #{self.class} to #{type}"
    end

    self
end

#copyObject

Deep copy of an object.



362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
# File 'lib/origami/object.rb', line 362

def copy
    saved_doc = @document
    saved_parent = @parent

    @document = @parent = nil # do not process parent object and document in the copy

    # Perform the recursive copy (quite dirty).
    copyobj = Marshal.load(Marshal.dump(self))

    # restore saved values
    @document = saved_doc
    @parent = saved_parent

    copyobj.set_document(saved_doc) if copyobj.indirect?
    copyobj.parent = parent

    copyobj
end

#documentObject

Returns the PDF which the object belongs to.



524
525
526
527
528
529
# File 'lib/origami/object.rb', line 524

def document
    if self.indirect? then @document
    else
        @parent.document unless @parent.nil?
    end
end

#exportObject

Creates an exportable version of current object. The exportable version is a copy of self with solved references, no owning PDF and no parent. References to Catalog or PageTreeNode objects have been destroyed.

When exported, an object can be moved into another document without hassle.



432
433
434
435
436
437
438
439
440
# File 'lib/origami/object.rb', line 432

def export
    exported_obj = self.logicalize
    exported_obj.no = exported_obj.generation = 0
    exported_obj.set_document(nil) if exported_obj.indirect?
    exported_obj.parent = nil
    exported_obj.xref_cache.clear

    exported_obj
end

#indirect?Boolean

Returns whether the objects is indirect, which means that it is not embedded into another object.

Returns:



355
356
357
# File 'lib/origami/object.rb', line 355

def indirect?
    @indirect
end

#indirect_parentObject

Returns the indirect object which contains this object. If the current object is already indirect, returns self.



500
501
502
503
504
505
# File 'lib/origami/object.rb', line 500

def indirect_parent
    obj = self
    obj = obj.parent until obj.indirect?

    obj
end

#initialize(*cons) ⇒ Object

Creates a new PDF Object.



302
303
304
305
306
307
308
309
# File 'lib/origami/object.rb', line 302

def initialize(*cons)
    @indirect = false
    @no, @generation = 0, 0
    @document = nil
    @parent = nil

    super(*cons) unless cons.empty?
end

#logicalizeObject

Returns a logicalized copy of self. See logicalize!



446
447
448
# File 'lib/origami/object.rb', line 446

def logicalize #:nodoc:
    self.copy.logicalize!
end

#logicalize!Object

Transforms recursively every references to the copy of their respective object. Catalog and PageTreeNode objects are excluded to limit the recursion.



454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
# File 'lib/origami/object.rb', line 454

def logicalize! #:nodoc:

    resolve_all_references = -> (obj, browsed = [], ref_cache = {}) do
        return if browsed.include?(obj)
        browsed.push(obj)

        if obj.is_a?(ObjectStream)
            obj.each do |subobj|
                resolve_all_references[obj, browsed, ref_cache]
            end
        end

        if obj.is_a?(Dictionary) or obj.is_a?(Array)
            obj.map! do |subobj|
                if subobj.is_a?(Reference)
                    new_obj =
                        if ref_cache.has_key?(subobj)
                            ref_cache[subobj]
                        else
                            ref_cache[subobj] = subobj.solve.copy
                        end
                    new_obj.no = new_obj.generation = 0
                    new_obj.parent = obj

                    new_obj unless new_obj.is_a?(Catalog) or new_obj.is_a?(PageTreeNode)
                else
                    subobj
                end
            end

            obj.each do |subobj|
                resolve_all_references[subobj, browsed, ref_cache]
            end

        elsif obj.is_a?(Stream)
            resolve_all_references[obj.dictionary, browsed, ref_cache]
        end
    end

    resolve_all_references[self]
end

#native_typeObject

Returns the native PDF type of this Object.



637
638
639
# File 'lib/origami/object.rb', line 637

def native_type
  self.class.native_type
end

#post_buildObject

Generic method called just after the object is finalized. At this time, any indirect object has its own number and generation identifier.



341
342
343
# File 'lib/origami/object.rb', line 341

def post_build
    self
end

#pre_buildObject

Generic method called just before the object is finalized. At this time, no number nor generation allocation has yet been done.



333
334
335
# File 'lib/origami/object.rb', line 333

def pre_build
    self
end

#referenceObject

Returns an indirect reference to this object, or a Null object is this object is not indirect.

Raises:



384
385
386
387
388
389
390
391
# File 'lib/origami/object.rb', line 384

def reference
    raise InvalidObjectError, "Cannot reference a direct object" unless self.indirect?

    ref = Reference.new(@no, @generation)
    ref.parent = self

    ref
end

#set_document(doc) ⇒ Object

Raises:



531
532
533
534
535
# File 'lib/origami/object.rb', line 531

def set_document(doc)
    raise InvalidObjectError, "You cannot set the document of a direct object" unless self.indirect?

    @document = doc
end

#set_indirect(bool) ⇒ Object

Sets whether the object is indirect or not. Indirect objects are allocated numbers at build time.



315
316
317
318
319
320
321
322
323
324
325
326
327
# File 'lib/origami/object.rb', line 315

def set_indirect(bool)
    unless bool == true or bool == false
        raise TypeError, "The argument must be boolean"
    end

    if bool == false
        @no = @generation = 0
        @document = nil
    end

    @indirect = bool
    self
end

#solveObject

Returns self.



517
518
519
# File 'lib/origami/object.rb', line 517

def solve
    self
end

#to_oObject

Returns self.



510
511
512
# File 'lib/origami/object.rb', line 510

def to_o
    self
end

#to_s(data) ⇒ Object Also known as: output, to_obfuscated_str

Outputs this object into PDF code.

data

The object data.



653
654
655
656
657
658
659
660
# File 'lib/origami/object.rb', line 653

def to_s(data)
    content = ""
    content << "#{no} #{generation} #{TOKENS.first}" << EOL if self.indirect?
    content << data
    content << EOL << TOKENS.last << EOL if self.indirect?

    content.force_encoding('binary')
end

#typeObject

Returns the symbol type of this Object.



626
627
628
629
630
# File 'lib/origami/object.rb', line 626

def type
    name = (self.class.name or self.class.superclass.name or self.native_type.name)

    name.split("::").last.to_sym
end

#version_requiredObject

:nodoc:



619
620
621
# File 'lib/origami/object.rb', line 619

def version_required #:nodoc:
    [ 1.0, 0 ]
end

#xrefsObject

Returns an array of references pointing to the current object.

Raises:



396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
# File 'lib/origami/object.rb', line 396

def xrefs
    raise InvalidObjectError, "Cannot find xrefs to a direct object" unless self.indirect?

    if self.document.nil?
        raise InvalidObjectError, "Not attached to any document"
    end

    refs = []
    @document.root_objects.each do |obj|
        if obj.is_a?(ObjectStream)
            obj.each do |child|
                case child
                when Dictionary, Array
                    refs.concat child.xref_cache[self.reference] if child.xref_cache.key?(self.reference)
                end
            end
        end

        obj = obj.dictionary if obj.is_a?(Stream)

        case obj
        when Dictionary, Array
            refs.concat obj.xref_cache[self.reference] if obj.xref_cache.key?(self.reference)
        end
    end

    refs
end