Class: Langchain::Processors::Pptx

Inherits:
Base
  • Object
show all
Defined in:
lib/langchain/processors/pptx.rb

Constant Summary collapse

EXTENSIONS =
[".pptx"]
CONTENT_TYPES =
["application/vnd.openxmlformats-officedocument.presentationml.presentation"]

Instance Method Summary collapse

Methods included from DependencyHelper

#depends_on

Constructor Details

#initializePptx

Returns a new instance of Pptx.



9
10
11
# File 'lib/langchain/processors/pptx.rb', line 9

def initialize(*)
  depends_on "power_point_pptx"
end

Instance Method Details

#parse(data) ⇒ String

Parse the document and return the text

Parameters:

  • data (File)

Returns:

  • (String)


16
17
18
19
20
21
22
23
24
25
26
# File 'lib/langchain/processors/pptx.rb', line 16

def parse(data)
  presentation = PowerPointPptx::Document.open(data)

  slides = presentation.slides
  contents = slides.map(&:content)
  text = contents.map do |sections|
    sections.map(&:strip).join(" ")
  end

  text.join("\n\n")
end