Class: Docsplit::PageExtractor

Inherits:
Object
  • Object
show all
Defined in:
lib/docsplit/page_extractor.rb

Overview

Delegates to pdftk in order to create bursted single pages from a PDF document.

Instance Method Summary collapse

Instance Method Details

#extract(pdfs, opts) ⇒ Object

Burst a list of pdfs into single pages, as ‘pdfname_pagenumber.pdf`.



6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
# File 'lib/docsplit/page_extractor.rb', line 6

def extract(pdfs, opts)
  extract_options opts
  [pdfs].flatten.each do |pdf|
    pdf_name = File.basename(pdf, File.extname(pdf))
    page_path = ESCAPE[File.join(@output, pdf_name.to_s)] + '_%d.pdf'
    FileUtils.mkdir_p @output unless File.exist?(@output)

    cmd = if DEPENDENCIES[:pdftailor] # prefer pdftailor, but keep pdftk for backwards compatability
            "pdftailor unstitch --output #{page_path} #{ESCAPE[pdf]} 2>&1"
          else
            "pdftk #{ESCAPE[pdf]} burst output #{page_path} 2>&1"
    end
    result = `#{cmd}`.chomp
    FileUtils.rm('doc_data.txt') if File.exist?('doc_data.txt')
    raise ExtractionFailed, result if $?.exitstatus.nonzero?
    result
  end
end