Module: Asposepdfjava::ExtractTextFromAllPages
- Defined in:
- lib/asposepdfjava/Text/extracttextfromallpages.rb
Instance Method Summary collapse
Instance Method Details
#initialize ⇒ Object
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 |
# File 'lib/asposepdfjava/Text/extracttextfromallpages.rb', line 3 def initialize() # The path to the documents directory. data_dir = File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) + '/data/' # Open the target document pdf = Rjb::import('com.aspose.pdf.Document').new(data_dir + 'input1.pdf') # create TextAbsorber object to extract text text_absorber = Rjb::import('com.aspose.pdf.TextAbsorber').new # accept the absorber for all the pages pdf.getPages().accept(text_absorber) # In order to extract text from specific page of document, we need to specify the particular page using its index against accept(..) method. # accept the absorber for particular PDF page # pdfDocument.getPages().get_Item(1).accept(textAbsorber); #get the extracted text extracted_text = text_absorber.getText() # create a writer and open the file writer = Rjb::import('java.io.FileWriter').new(Rjb::import('java.io.File').new(data_dir + "extracted_text.out.txt")) writer.write(extracted_text) # write a line of text to the file # tw.WriteLine(extractedText); # close the stream writer.close() puts "Text extracted successfully. Check output file." end |