Class: Plaintext::Resolver

Inherits:
Object
  • Object
show all
Defined in:
lib/plaintext/resolver.rb

Constant Summary collapse

HANDLERS =
[
    Plaintext::PdfHandler,
    Plaintext::OpendocumentHandler,
    Plaintext::DocxHandler, Plaintext::XlsxHandler, Plaintext::PptxHandler,
    Plaintext::DocHandler, Plaintext::XlsHandler, Plaintext::PptHandler,
    Plaintext::ImageHandler,
    Plaintext::RtfHandler,
    Plaintext::PlaintextHandler
].freeze

Class Attribute Summary collapse

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(file, content_type = nil) ⇒ Resolver

Returns a new instance of Resolver.



28
29
30
31
32
# File 'lib/plaintext/resolver.rb', line 28

def initialize(file, content_type = nil)
  @file = file
  @content_type = content_type
  @max_plaintext_bytes = 4_194_304 # 4 megabytes
end

Class Attribute Details

.cached_file_handlersObject

Returns the value of attribute cached_file_handlers.



10
11
12
# File 'lib/plaintext/resolver.rb', line 10

def cached_file_handlers
  @cached_file_handlers
end

Instance Attribute Details

#max_plaintext_bytesObject

maximum length of returned plain text in bytes. Default: 4MB



7
8
9
# File 'lib/plaintext/resolver.rb', line 7

def max_plaintext_bytes
  @max_plaintext_bytes
end

Class Method Details

.file_handlersObject



22
23
24
25
# File 'lib/plaintext/resolver.rb', line 22

def file_handlers
  return self.cached_file_handlers if self.cached_file_handlers.present?
  self.cached_file_handlers = HANDLERS.map(&:new)
end

Instance Method Details

#textObject

Returns the extracted fulltext or nil if no matching handler was found for the file type.



37
38
39
40
41
42
43
44
45
# File 'lib/plaintext/resolver.rb', line 37

def text
  if handler = find_handler and
      text = handler.text(@file, max_size: max_plaintext_bytes)

    text.gsub!(/\s+/m, ' ')
    text.strip!
    text.mb_chars.compose.limit(max_plaintext_bytes).to_s
  end
end