Module: PDF::Extractor

Defined in:
lib/pdf/extractor.rb

Defined Under Namespace

Classes: ConversionError, Document, Element, Font, MalformedPDFError, Page, Reader

Class Method Summary collapse

Class Method Details

.open(path) ⇒ Object



9
10
11
12
13
14
15
16
17
18
19
20
21
# File 'lib/pdf/extractor.rb', line 9

def self.open(path)
  input = `pdftohtml -enc UTF-8 -xml -stdout #{path} 2>&1`
  case input
  #when /command not found/

  #  raise ConversionError, 'pdftohtml command not found'

  when /PDF file is damaged/
    raise MalformedPDFError, "the PDF with filename '#{path}' is malformed"
  when /Couldn't open file/
    raise RuntimeError, "Couldn't open file: '#{path}'"
  else
    PDF::Extractor::Document.new(input)
  end
end