Class: Mindee::Input::Source::LocalInputSource
- Inherits:
-
Object
- Object
- Mindee::Input::Source::LocalInputSource
- Defined in:
- lib/mindee/input/sources/local_input_source.rb
Overview
Base class for loading documents.
Direct Known Subclasses
Base64InputSource, BytesInputSource, FileInputSource, PathInputSource
Instance Attribute Summary collapse
- #file_mimetype ⇒ String readonly
- #filename ⇒ String readonly
- #io_stream ⇒ StringIO | File readonly
Class Method Summary collapse
-
.fix_pdf(stream, maximum_offset: 500) ⇒ StringIO
Attempt to fix the PDF data in the given stream.
Instance Method Summary collapse
-
#apply_page_options(options) ⇒ Object
Cuts a PDF file according to provided options.
-
#compress!(quality: 85, max_width: nil, max_height: nil, force_source_text: false, disable_source_text: true) ⇒ Object
Compresses the file, according to the provided info.
-
#count_pages ⇒ Integer
deprecated
Deprecated.
Use #page_count instead.
-
#fix_pdf!(maximum_offset: 500) ⇒ void
Attempts to fix the PDF data in the file.
-
#initialize(io_stream, filename, repair_pdf: false) ⇒ LocalInputSource
constructor
A new instance of LocalInputSource.
-
#page_count ⇒ Integer
Returns the page count for a document.
-
#pdf? ⇒ Boolean
Shorthand for PDF mimetype validation.
-
#process_pdf(options) ⇒ Object
deprecated
Deprecated.
Use #apply_page_options instead.
-
#read_contents(close: true) ⇒ Array<>
Reads a document.
-
#rescue_broken_pdf(_) ⇒ Object
deprecated
Deprecated.
See #fix_pdf! or #self#self.fix_pdf instead.
-
#source_text? ⇒ bool
Checks whether the file has source text if it is a pdf.
-
#write_to_file(path) ⇒ Object
Write the file to a given path.
Constructor Details
#initialize(io_stream, filename, repair_pdf: false) ⇒ LocalInputSource
Returns a new instance of LocalInputSource.
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
# File 'lib/mindee/input/sources/local_input_source.rb', line 36 def initialize(io_stream, filename, repair_pdf: false) @io_stream = io_stream @filename = filename @file_mimetype = if repair_pdf Marcel::MimeType.for @io_stream else Marcel::MimeType.for @io_stream, name: @filename end if ALLOWED_MIME_TYPES.include? @file_mimetype logger.debug("Loaded new input #{@filename} from #{self.class}") return end if filename.end_with?('.pdf') && repair_pdf fix_pdf! logger.debug("Loaded new input #{@filename} from #{self.class}") return if ALLOWED_MIME_TYPES.include? @file_mimetype end raise Errors::MindeeMimeTypeError, @file_mimetype.to_s end |
Instance Attribute Details
#file_mimetype ⇒ String (readonly)
29 30 31 |
# File 'lib/mindee/input/sources/local_input_source.rb', line 29 def file_mimetype @file_mimetype end |
#filename ⇒ String (readonly)
27 28 29 |
# File 'lib/mindee/input/sources/local_input_source.rb', line 27 def filename @filename end |
#io_stream ⇒ StringIO | File (readonly)
31 32 33 |
# File 'lib/mindee/input/sources/local_input_source.rb', line 31 def io_stream @io_stream end |
Class Method Details
.fix_pdf(stream, maximum_offset: 500) ⇒ StringIO
Attempt to fix the PDF data in the given stream.
84 85 86 87 88 89 90 91 |
# File 'lib/mindee/input/sources/local_input_source.rb', line 84 def self.fix_pdf(stream, maximum_offset: 500) out_stream = StringIO.new stream.gets('%PDF-') raise Errors::MindeePDFError if stream.eof? || stream.pos > maximum_offset stream.pos = stream.pos - 5 out_stream << stream.read end |
Instance Method Details
#apply_page_options(options) ⇒ Object
Cuts a PDF file according to provided options.
101 102 103 104 |
# File 'lib/mindee/input/sources/local_input_source.rb', line 101 def () @io_stream.seek(0) @io_stream = PDF::PDFProcessor.parse(@io_stream, ) end |
#compress!(quality: 85, max_width: nil, max_height: nil, force_source_text: false, disable_source_text: true) ⇒ Object
Compresses the file, according to the provided info.
169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 |
# File 'lib/mindee/input/sources/local_input_source.rb', line 169 def compress!(quality: 85, max_width: nil, max_height: nil, force_source_text: false, disable_source_text: true) buffer = if pdf? Mindee::PDF::PDFCompressor.compress_pdf( @io_stream, quality: quality, force_source_text_compression: force_source_text, disable_source_text: disable_source_text ) else Mindee::Image::ImageCompressor.compress_image( @io_stream, quality: quality, max_width: max_width, max_height: max_height ) end @io_stream = buffer @io_stream.rewind end |
#count_pages ⇒ Integer
Use #page_count instead.
Returns the page count for a document. Defaults to one for images.
156 157 158 |
# File 'lib/mindee/input/sources/local_input_source.rb', line 156 def count_pages page_count end |
#fix_pdf!(maximum_offset: 500) ⇒ void
This method returns an undefined value.
Attempts to fix the PDF data in the file.
73 74 75 76 77 |
# File 'lib/mindee/input/sources/local_input_source.rb', line 73 def fix_pdf!(maximum_offset: 500) @io_stream = LocalInputSource.fix_pdf(@io_stream, maximum_offset: maximum_offset) @io_stream.rewind @file_mimetype = Marcel::MimeType.for @io_stream end |
#page_count ⇒ Integer
Returns the page count for a document. Defaults to one for images.
144 145 146 147 148 149 150 |
# File 'lib/mindee/input/sources/local_input_source.rb', line 144 def page_count return 1 unless pdf? @io_stream.seek(0) pdf_processor = Mindee::PDF::PDFProcessor.open_pdf(@io_stream) pdf_processor.pages.size end |
#pdf? ⇒ Boolean
Shorthand for PDF mimetype validation.
65 66 67 |
# File 'lib/mindee/input/sources/local_input_source.rb', line 65 def pdf? @file_mimetype.to_s == 'application/pdf' end |
#process_pdf(options) ⇒ Object
Use #apply_page_options instead.
108 109 110 |
# File 'lib/mindee/input/sources/local_input_source.rb', line 108 def process_pdf() () end |
#read_contents(close: true) ⇒ Array<>
Reads a document.
115 116 117 118 119 120 121 122 123 |
# File 'lib/mindee/input/sources/local_input_source.rb', line 115 def read_contents(close: true) logger.debug("Reading data from: #{@filename}") @io_stream.seek(0) # Avoids needlessly re-packing some files data = @io_stream.read @io_stream.rewind @io_stream.close if close [data, { filename: Mindee::Input::Source.convert_to_unicode_escape(@filename) }] end |
#rescue_broken_pdf(_) ⇒ Object
See #fix_pdf! or Mindee::Input::Source::LocalInputSource#self#self.fix_pdf instead.
60 61 62 |
# File 'lib/mindee/input/sources/local_input_source.rb', line 60 def rescue_broken_pdf(_) fix_pdf! end |
#source_text? ⇒ bool
Checks whether the file has source text if it is a pdf. false otherwise
191 192 193 |
# File 'lib/mindee/input/sources/local_input_source.rb', line 191 def source_text? Mindee::PDF::PDFTools.source_text?(@io_stream) end |
#write_to_file(path) ⇒ Object
Write the file to a given path. Uses the initial file name by default.
127 128 129 130 131 132 133 134 135 136 137 138 139 |
# File 'lib/mindee/input/sources/local_input_source.rb', line 127 def write_to_file(path) t_path = if File.directory?(path || '') || path.to_s.end_with?('/') File.join(path || '', @filename) else path end full_path = File.(t_path || '') FileUtils.mkdir_p(File.dirname(full_path)) @io_stream.rewind File.binwrite(full_path, @io_stream.read || '') logger.debug("Wrote file successfully to #{full_path}") @io_stream.rewind end |