Class: Origami::PDF::LazyParser

Inherits:
Parser show all
Defined in:
lib/origami/parsers/pdf/lazy.rb

Overview

Create a new PDF lazy Parser.

Constant Summary

Constants inherited from Origami::Parser

Origami::Parser::VERBOSE_DEBUG, Origami::Parser::VERBOSE_INFO, Origami::Parser::VERBOSE_QUIET, Origami::Parser::VERBOSE_TRACE

Instance Attribute Summary

Attributes inherited from Origami::Parser

#options

Instance Method Summary collapse

Methods inherited from Parser

#initialize

Methods inherited from Origami::Parser

#defer_type_cast, #initialize, #parse_object, #parse_trailer, #parse_xreftable, #pos, #pos=, #target_data, #target_filename, #target_filesize

Constructor Details

This class inherits a constructor from Origami::PDF::Parser

Instance Method Details

#parse(stream) ⇒ Object

Raises:



32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# File 'lib/origami/parsers/pdf/lazy.rb', line 32

def parse(stream)
    super

    pdf = parse_initialize
    revisions = []

    # Set the scanner position at the end.
    @data.terminate

    # Locate the startxref token.
    until @data.match?(/#{Trailer::XREF_TOKEN}/)
        raise ParsingError, "No xref token found" if @data.pos == 0
        @data.pos -= 1
    end

    # Extract the offset of the last xref section.
    trailer = Trailer.parse(@data, self)
    raise ParsingError, "Cannot locate xref section" if trailer.startxref.zero?

    xref_offset = trailer.startxref
    while xref_offset and xref_offset != 0

        # Create a new revision based on the xref section offset.
        revision = parse_revision(pdf, xref_offset)

        # Locate the previous xref section.
        if revision.xrefstm
            xref_offset = revision.xrefstm[:Prev].to_i
        else
            xref_offset = revision.trailer[:Prev].to_i
        end

        # Prepend the revision.
        revisions.unshift(revision)
    end

    pdf.revisions.clear
    revisions.each do |rev|
        pdf.revisions.push(rev)
        pdf.insert(rev.xrefstm) if rev.has_xrefstm?
    end

    parse_finalize(pdf)

    pdf
end