Method: Docsplit::ImageExtractor#convert

Defined in:
lib/docsplit/image_extractor.rb

#convert(pdf, size, format, previous = nil) ⇒ Object

Convert a single PDF into page images at the specified size and format. If ‘–rolling`, and we have a previous image at a larger size to work with, we simply downsample that image, instead of re-rendering the entire PDF. Now we generate one page at a time, a counterintuitive opimization suggested by the GraphicsMagick list, that seems to work quite well.



30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/docsplit/image_extractor.rb', line 30

def convert(pdf, size, format, previous=nil)
  tempdir   = Dir.mktmpdir
  basename  = File.basename(pdf, File.extname(pdf))
  directory = directory_for(size)
  pages     = @pages || '1-' + Docsplit.extract_length(pdf).to_s
  escaped_pdf = ESCAPE[pdf]
  FileUtils.mkdir_p(directory) unless File.exists?(directory)
  common    = "#{MEMORY_ARGS} -density #{@density} #{resize_arg(size)} #{quality_arg(format)}"
  if previous
    FileUtils.cp(Dir[directory_for(previous) + '/*'], directory)
    result = `MAGICK_TMPDIR=#{tempdir} OMP_NUM_THREADS=2 gm mogrify #{common} -unsharp 0x0.5+0.75 \"#{directory}/*.#{format}\" 2>&1`.chomp
    raise ExtractionFailed, result if $? != 0
  else
    page_list(pages).each do |page|
      out_file  = ESCAPE[File.join(directory, "#{basename}_#{page}.#{format}")]
      cmd = "MAGICK_TMPDIR=#{tempdir} OMP_NUM_THREADS=2 gm convert +adjoin -define pdf:use-cropbox=true #{common} #{escaped_pdf}[#{page - 1}] #{out_file} 2>&1".chomp
      result = `#{cmd}`.chomp
      raise ExtractionFailed, result if $? != 0
    end
  end
ensure
  FileUtils.remove_entry_secure tempdir if File.exists?(tempdir)
end