Module: OcrFile::ImageEngines::PdfEngine
Constant Summary collapse
- PAGE_BREAK =
"\n\r\n"- DEFAULT_PAGE_OPTIONS =
{ font: 'Helvetica', font_size: 5, #8 # 12 text_x: 20, text_y: 800, minimum_word: 5, }
Instance Method Summary collapse
- #add_page(document, text, options) ⇒ Object
- #extract_images(document, save_path, verbose: false) ⇒ Object
- #merge(documents) ⇒ Object
- #open_pdf(file, password: '') ⇒ Object
- #pdf_from_text(text, options = DEFAULT_PAGE_OPTIONS) ⇒ Object
- #save_pdf(document, save_file_path, optimise: true) ⇒ Object
Instance Method Details
#add_page(document, text, options) ⇒ Object
27 28 29 30 31 32 33 |
# File 'lib/ocr-file/image_engines/pdf_engine.rb', line 27 def add_page(document, text, ) canvas = document.pages.add.canvas canvas.font([:font], size: [:font_size]) canvas.text(text, at: [[:text_x], [:text_y]]) document end |
#extract_images(document, save_path, verbose: false) ⇒ Object
43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
# File 'lib/ocr-file/image_engines/pdf_engine.rb', line 43 def extract_images(document, save_path, verbose: false) image_paths = [] ::HexaPDF::CLI::Images.new.send(:each_image, document) do |image, index, pindex, (_x_ppi, _y_ppi)| puts "Processing page: #{pindex} ..." info = image.info if info.writable image_filename = "#{index}.#{image.info.extension}" image_path = "#{save_path}/#{image_filename}" image.write(image_path) image_paths << image_path elsif command_parser.verbosity_warning? puts style("Warning (image #{index}, page #{pindex}): PDF image format not supported for writing", RED) end end image_paths end |
#merge(documents) ⇒ Object
64 65 66 67 68 69 70 71 72 |
# File 'lib/ocr-file/image_engines/pdf_engine.rb', line 64 def merge(documents) target = ::HexaPDF::Document.new documents.each do |document| document.pages.each { |page| target.pages << target.import(page) } end target end |
#open_pdf(file, password: '') ⇒ Object
39 40 41 |
# File 'lib/ocr-file/image_engines/pdf_engine.rb', line 39 def open_pdf(file, password: '') ::HexaPDF::Document.open(file, decryption_opts: { password: password }) end |
#pdf_from_text(text, options = DEFAULT_PAGE_OPTIONS) ⇒ Object
16 17 18 19 20 21 22 23 24 25 |
# File 'lib/ocr-file/image_engines/pdf_engine.rb', line 16 def pdf_from_text(text, = DEFAULT_PAGE_OPTIONS) document = ::HexaPDF::Document.new text .split(PAGE_BREAK) .reject { |line| line.size < [:minimum_word] } .each { |page_text| document = add_page(document, page_text, ) } document end |
#save_pdf(document, save_file_path, optimise: true) ⇒ Object
35 36 37 |
# File 'lib/ocr-file/image_engines/pdf_engine.rb', line 35 def save_pdf(document, save_file_path, optimise: true) document.write(save_file_path, optimize: true) end |