Class: PDF::Reader
- Inherits:
-
Object
- Object
- PDF::Reader
- Defined in:
- lib/pdf/reader.rb,
lib/pdf/reader/lzw.rb,
lib/pdf/reader/cmap.rb,
lib/pdf/reader/font.rb,
lib/pdf/reader/page.rb,
lib/pdf/reader/xref.rb,
lib/pdf/reader/error.rb,
lib/pdf/reader/token.rb,
lib/pdf/reader/buffer.rb,
lib/pdf/reader/filter.rb,
lib/pdf/reader/parser.rb,
lib/pdf/reader/stream.rb,
lib/pdf/reader/encoding.rb,
lib/pdf/reader/text_run.rb,
lib/pdf/reader/reference.rb,
lib/pdf/reader/cid_widths.rb,
lib/pdf/reader/filter/lzw.rb,
lib/pdf/reader/glyph_hash.rb,
lib/pdf/reader/page_state.rb,
lib/pdf/reader/filter/null.rb,
lib/pdf/reader/object_hash.rb,
lib/pdf/reader/page_layout.rb,
lib/pdf/reader/filter/flate.rb,
lib/pdf/reader/form_xobject.rb,
lib/pdf/reader/object_cache.rb,
lib/pdf/reader/object_stream.rb,
lib/pdf/reader/filter/ascii85.rb,
lib/pdf/reader/pages_strategy.rb,
lib/pdf/reader/print_receiver.rb,
lib/pdf/reader/font_descriptor.rb,
lib/pdf/reader/filter/ascii_hex.rb,
lib/pdf/reader/filter/depredict.rb,
lib/pdf/reader/resource_methods.rb,
lib/pdf/reader/filter/run_length.rb,
lib/pdf/reader/register_receiver.rb,
lib/pdf/reader/page_text_receiver.rb,
lib/pdf/reader/synchronized_cache.rb,
lib/pdf/reader/orientation_detector.rb,
lib/pdf/reader/null_security_handler.rb,
lib/pdf/reader/transformation_matrix.rb,
lib/pdf/reader/overlapping_runs_filter.rb,
lib/pdf/reader/standard_security_handler.rb,
lib/pdf/reader/width_calculator/built_in.rb,
lib/pdf/reader/width_calculator/composite.rb,
lib/pdf/reader/width_calculator/true_type.rb,
lib/pdf/reader/width_calculator/type_zero.rb,
lib/pdf/reader/standard_security_handler_v5.rb,
lib/pdf/reader/unimplemented_security_handler.rb,
lib/pdf/reader/width_calculator/type_one_or_three.rb
Overview
Copyright © 2010 James Healy ([email protected])
Defined Under Namespace
Modules: Filter, ResourceMethods, WidthCalculator Classes: Buffer, CMap, CidWidths, Encoding, EncryptedPDFError, Error, EventPoint, Font, FontDescriptor, FormXObject, GlyphHash, InvalidObjectError, InvalidPageError, LZW, MalformedPDFError, NullSecurityHandler, ObjectCache, ObjectHash, ObjectStream, OrientationDetector, OverlappingRunsFilter, Page, PageLayout, PageState, PageTextReceiver, PagesStrategy, Parser, PrintReceiver, Reference, RegisterReceiver, StandardSecurityHandler, StandardSecurityHandlerV5, Stream, SynchronizedCache, TextRun, Token, TransformationMatrix, UnimplementedSecurityHandler, UnsupportedFeatureError, XRef
Instance Attribute Summary collapse
-
#objects ⇒ Object
readonly
lowlevel hash-like access to all objects in the underlying PDF.
Class Method Summary collapse
-
.open(input, opts = {}) {|PDF::Reader.new(input, opts)| ... } ⇒ Object
syntactic sugar for opening a PDF file.
Instance Method Summary collapse
- #info ⇒ Object
-
#initialize(input, opts = {}) ⇒ Reader
constructor
creates a new document reader for the provided PDF.
- #metadata ⇒ Object
-
#page(num) ⇒ Object
returns a single PDF::Reader::Page for the specified page.
- #page_count ⇒ Object
-
#pages ⇒ Object
returns an array of PDF::Reader::Page objects, one for each page in the source PDF.
- #pdf_version ⇒ Object
Constructor Details
#initialize(input, opts = {}) ⇒ Reader
creates a new document reader for the provided PDF.
input can be an IO-ish object (StringIO, File, etc) containing a PDF or a filename
reader = PDF::Reader.new("somefile.pdf")
File.open("somefile.pdf","rb") do |file|
reader = PDF::Reader.new(file)
end
If the source file is encrypted you can provide a password for decrypting
reader = PDF::Reader.new("somefile.pdf", :password => "apples")
114 115 116 117 118 |
# File 'lib/pdf/reader.rb', line 114 def initialize(input, opts = {}) @cache = PDF::Reader::ObjectCache.new opts.merge!(:cache => @cache) @objects = PDF::Reader::ObjectHash.new(input, opts) end |
Instance Attribute Details
#objects ⇒ Object (readonly)
lowlevel hash-like access to all objects in the underlying PDF
97 98 99 |
# File 'lib/pdf/reader.rb', line 97 def objects @objects end |
Class Method Details
Instance Method Details
#info ⇒ Object
120 121 122 123 |
# File 'lib/pdf/reader.rb', line 120 def info dict = @objects.deref(@objects.trailer[:Info]) doc_strings_to_utf8(dict) end |
#metadata ⇒ Object
125 126 127 128 129 130 131 132 133 134 |
# File 'lib/pdf/reader.rb', line 125 def stream = @objects.deref(root[:Metadata]) if stream.nil? nil else xml = stream.unfiltered_data xml.force_encoding("utf-8") xml end end |
#page(num) ⇒ Object
returns a single PDF::Reader::Page for the specified page. Use this instead of pages method when you need to access just a single page
reader = PDF::Reader.new("somefile.pdf")
page = reader.page(10)
puts page.text
See the docs for PDF::Reader::Page to read more about the methods available on each page
201 202 203 204 205 206 207 |
# File 'lib/pdf/reader.rb', line 201 def page(num) num = num.to_i if num < 1 || num > self.page_count raise InvalidPageError, "Valid pages are 1 .. #{self.page_count}" end PDF::Reader::Page.new(@objects, num, :cache => @cache) end |
#page_count ⇒ Object
136 137 138 139 140 141 142 |
# File 'lib/pdf/reader.rb', line 136 def page_count pages = @objects.deref(root[:Pages]) unless pages.kind_of?(::Hash) raise MalformedPDFError, 'Pages structure is missing' end @page_count ||= @objects.deref(pages[:Count]) end |
#pages ⇒ Object
179 180 181 182 183 184 185 186 187 |
# File 'lib/pdf/reader.rb', line 179 def pages (1..self.page_count).map do |num| begin PDF::Reader::Page.new(@objects, num, :cache => @cache) rescue InvalidPageError => ex raise MalformedPDFError, "Missing data for page: #{num}" end end end |
#pdf_version ⇒ Object
144 145 146 |
# File 'lib/pdf/reader.rb', line 144 def pdf_version @objects.pdf_version end |