Class: Plaintext::PptxHandler

Inherits:
OfficeDocumentHandler show all
Defined in:
lib/plaintext/file_handler/zipped_xml_handler/office_document_handler/pptx_handler.rb

Constant Summary collapse

CONTENT_TYPES =
[
    'application/vnd.openxmlformats-officedocument.presentationml.presentation',
    'application/vnd.openxmlformats-officedocument.presentationml.slideshow',
    'application/vnd.ms-powerpoint.template.macroEnabled.12'
]

Instance Method Summary collapse

Methods inherited from FileHandler

#accept?, #set

Constructor Details

#initializePptxHandler

Returns a new instance of PptxHandler.



11
12
13
14
15
# File 'lib/plaintext/file_handler/zipped_xml_handler/office_document_handler/pptx_handler.rb', line 11

def initialize
  super
  @content_types = CONTENT_TYPES
  @namespace_uri = 'http://schemas.openxmlformats.org/drawingml/2006/main'
end

Instance Method Details

#text(file, options = {}) ⇒ Object



17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# File 'lib/plaintext/file_handler/zipped_xml_handler/office_document_handler/pptx_handler.rb', line 17

def text(file, options = {})
  max_size = options[:max_size]
  slides = []
  result = ''.dup
  Zip::File.open(file) do |zip_file|
    zip_file.each do |entry|
      if entry.name =~ /slide(\d+)\.xml/
        slides << [$1, entry]
      end
    end

    slides.sort!{|a, b| a.first <=> b.first}
    slides.each do |id, entry|
      result << xml_to_text(entry.get_input_stream, max_size)
      break if max_size and result.length >= max_size
    end
  end
  return result
end