Module: Pdf::Textstream

Defined in:
lib/pdf/textstream.rb,
lib/pdf/textstream/version.rb

Constant Summary collapse

PDFLayoutTextStripper =
JavaUtilities.get_proxy_class("PDFLayoutTextStripper")
PDFParser =

change namespace

Java::OrgApachePdfboxPdfparser::PDFParser
RandomAccessFile =
Java::OrgApachePdfboxIo::RandomAccessFile
PDDocument =
Java::OrgApachePdfboxPdmodel::PDDocument
PDFTextStripper =
Java::OrgApachePdfboxText::PDFTextStripper
VERSION =
"0.1.1"

Class Method Summary collapse

Class Method Details

.file_path_to_text(path) ⇒ Object



20
21
22
23
24
25
26
27
28
# File 'lib/pdf/textstream.rb', line 20

def self.file_path_to_text(path)
  # TODO: exception handling
  pdfParser = PDFParser.new(RandomAccessFile.new(Java::JavaIo::File.new(path), "r"))
  pdfParser.parse()
  pdDocument = PDDocument.new(pdfParser.getDocument());
  pdfTextStripper = PDFLayoutTextStripper.new
  string = pdfTextStripper.getText(pdDocument);
  return string
end