Class: LoaderRuby::Loaders::Pdf

Inherits:
Base
  • Object
show all
Defined in:
lib/loader_ruby/loaders/pdf.rb

Constant Summary collapse

EXTENSIONS =
%w[.pdf].freeze

Instance Method Summary collapse

Instance Method Details

#load(path, password: nil, **opts) ⇒ Object



8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# File 'lib/loader_ruby/loaders/pdf.rb', line 8

def load(path, password: nil, **opts)
  check_file_exists!(path)
  check_file_size!(path)

  begin
    require "pdf-reader"
  rescue LoadError
    raise DependencyMissingError,
      "pdf-reader gem is required for PDF loading. Add `gem 'pdf-reader'` to your Gemfile."
  end

  reader_opts = {}
  reader_opts[:password] = password if password
  reader = PDF::Reader.new(path, **reader_opts)
  pages = reader.pages.map(&:text)
  content = pages.join("\n\n")

  Document.new(
    content: content,
    metadata: (path,
      format: :pdf,
      pages: reader.page_count,
      info: reader.info
    )
  )
end