Module: PDF::Extractor

Defined in:
lib/pdf/extractor.rb

Defined Under Namespace

Classes: ConversionError, Document, Element, Font, MalformedPDFError, Page, Reader

Class Method Summary collapse

Class Method Details

.open(path) ⇒ Object



9
10
11
12
13
14
15
16
17
18
19
20
21
# File 'lib/pdf/extractor.rb', line 9

def self.open(path)
	input = `pdftohtml -enc UTF-8 -xml -stdout #{path} 2>&1`
	case input
	#when /command not found/
	#	raise ConversionError, 'pdftohtml command not found'
	when /PDF file is damaged/
		raise MalformedPDFError, "the PDF with filename '#{path}' is malformed"
	when /Couldn't open file/
		raise RuntimeError, "Couldn't open file: '#{path}'"
	else
		PDF::Extractor::Document.new(input)
	end
end