Module: OcrFile::ImageEngines::PdfEngine
Constant Summary collapse
- PAGE_BREAK =
"\n\r\n"
- DEFAULT_PAGE_OPTIONS =
{ font: 'Helvetica', font_size: 5, #8 # 12 text_x: 20, text_y: 800, minimum_word: 5, }
Instance Method Summary collapse
- #add_page(document, text, options) ⇒ Object
- #combine(text, pdf_of_images) ⇒ Object
- #extract_images(document, save_path, verbose: false) ⇒ Object
- #insert_image(document, image_path, dimensions: nil) ⇒ Object
- #merge(documents) ⇒ Object
- #open_pdf(file, password: '') ⇒ Object
- #pdf_from_text(text, options = DEFAULT_PAGE_OPTIONS) ⇒ Object
- #save_pdf(document, save_file_path, optimise: true) ⇒ Object
Instance Method Details
#add_page(document, text, options) ⇒ Object
27 28 29 30 31 32 33 |
# File 'lib/ocr-file/image_engines/pdf_engine.rb', line 27 def add_page(document, text, ) canvas = document.pages.add.canvas canvas.font([:font], size: [:font_size]) canvas.text(text, at: [[:text_x], [:text_y]]) document end |
#combine(text, pdf_of_images) ⇒ Object
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
# File 'lib/ocr-file/image_engines/pdf_engine.rb', line 86 def combine(text, pdf_of_images) return unless pdf_of_images.is_a?(::HexaPDF::Document) if text.is_a?(::HexaPDF::Document) pages_of_text = text.pages else # Assume raw text with PAGE_BREAK pages_of_text = text.split(PAGE_BREAK) end return unless pages_of_text.size == pdf_of_images.pages.size if text.is_a?(::HexaPDF::Document) # Keep the page structure else # Just text to embed end end |
#extract_images(document, save_path, verbose: false) ⇒ Object
43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
# File 'lib/ocr-file/image_engines/pdf_engine.rb', line 43 def extract_images(document, save_path, verbose: false) image_paths = [] ::HexaPDF::CLI::Images.new.send(:each_image, document) do |image, index, pindex, (_x_ppi, _y_ppi)| puts "Processing page: #{pindex} ..." info = image.info if info.writable image_filename = "#{index}.#{image.info.extension}" image_path = "#{save_path}/#{image_filename}" image.write(image_path) image_paths << image_path elsif command_parser.verbosity_warning? puts style("Warning (image #{index}, page #{pindex}): PDF image format not supported for writing", RED) end end image_paths end |
#insert_image(document, image_path, dimensions: nil) ⇒ Object
64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
# File 'lib/ocr-file/image_engines/pdf_engine.rb', line 64 def insert_image(document, image_path, dimensions: nil) image_processor = OcrFile::ImageEngines::ImageMagick.new( image_path: image_path, temp_path: @temp_folder_path, save_file_path: '', config: @config ) if dimensions width = dimensions[0] height = dimensions[1] else width = image_processor.width height = image_processor.height end page = document.pages.add([0, 0, width, height]) page.canvas.image(@image || image_path, at: [0, 0], width: width, height: height) document end |
#merge(documents) ⇒ Object
104 105 106 107 108 109 110 111 112 113 114 115 116 |
# File 'lib/ocr-file/image_engines/pdf_engine.rb', line 104 def merge(documents) target = ::HexaPDF::Document.new documents.each do |document| if document.is_a?(::HexaPDF::Document) document.pages.each { |page| target.pages << target.import(page) } else # Assume an image insert_image(target, document) end end target end |
#open_pdf(file, password: '') ⇒ Object
39 40 41 |
# File 'lib/ocr-file/image_engines/pdf_engine.rb', line 39 def open_pdf(file, password: '') ::HexaPDF::Document.open(file, decryption_opts: { password: password }) end |
#pdf_from_text(text, options = DEFAULT_PAGE_OPTIONS) ⇒ Object
16 17 18 19 20 21 22 23 24 25 |
# File 'lib/ocr-file/image_engines/pdf_engine.rb', line 16 def pdf_from_text(text, = DEFAULT_PAGE_OPTIONS) document = ::HexaPDF::Document.new text .split(PAGE_BREAK) .reject { |line| line.size < [:minimum_word] } .each { |page_text| document = add_page(document, page_text, ) } document end |
#save_pdf(document, save_file_path, optimise: true) ⇒ Object
35 36 37 |
# File 'lib/ocr-file/image_engines/pdf_engine.rb', line 35 def save_pdf(document, save_file_path, optimise: true) document.write(save_file_path, optimize: true) end |