Class: SharedTools::Tools::Doc::PdfReaderTool

Inherits:
RubyLLM::Tool
  • Object
show all
Defined in:
lib/shared_tools/tools/doc/pdf_reader_tool.rb

Overview

Examples:

tool = SharedTools::Tools::Doc::PdfReaderTool.new
tool.execute(doc_path: "./document.pdf", page_numbers: "1, 5, 10")

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(logger: nil) ⇒ PdfReaderTool

Returns a new instance of PdfReaderTool.

Parameters:

  • logger (Logger) (defaults to: nil)

    optional logger



27
28
29
# File 'lib/shared_tools/tools/doc/pdf_reader_tool.rb', line 27

def initialize(logger: nil)
  @logger = logger || RubyLLM.logger
end

Class Method Details

.nameObject



17
# File 'lib/shared_tools/tools/doc/pdf_reader_tool.rb', line 17

def self.name = 'doc_pdf_read'

Instance Method Details

#execute(page_numbers:, doc_path:) ⇒ Hash

Returns extraction result.

Parameters:

  • page_numbers (String)

    comma-separated page numbers

  • doc_path (String)

    path to PDF file

Returns:

  • (Hash)

    extraction result

Raises:

  • (LoadError)


35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# File 'lib/shared_tools/tools/doc/pdf_reader_tool.rb', line 35

def execute(page_numbers:, doc_path:)
  raise LoadError, "PdfReaderTool requires the 'pdf-reader' gem. Install it with: gem install pdf-reader" unless defined?(PDF::Reader)

  @logger.info("Reading PDF: #{doc_path}, pages: #{page_numbers}")

  begin
    @doc ||= PDF::Reader.new(doc_path)
    @logger.debug("PDF loaded successfully, total pages: #{@doc.pages.size}")

    page_numbers = page_numbers.split(",").map { |num| num.strip.to_i }
    @logger.debug("Processing pages: #{page_numbers.join(", ")}")

    # Validate page numbers
    total_pages = @doc.pages.size
    invalid_pages = page_numbers.select { |num| num < 1 || num > total_pages }

    if invalid_pages.any?
      @logger.warn("Invalid page numbers requested: #{invalid_pages.join(", ")}. Document has #{total_pages} pages.")
    end

    # Filter valid pages and map to content
    valid_pages = page_numbers.select { |num| num >= 1 && num <= total_pages }
    pages = valid_pages.map { |num| [num, @doc.pages[num.to_i - 1]] }

    result = {
      total_pages: total_pages,
      requested_pages: page_numbers,
      invalid_pages: invalid_pages,
      pages: pages.map { |num, p|
        @logger.debug("Extracted text from page #{num} (#{p&.text&.bytesize || 0} bytes)")
        { page: num, text: p&.text }
      },
    }

    @logger.info("Successfully extracted #{pages.size} pages from PDF")
    result
  rescue => e
    @logger.error("Failed to read PDF '#{doc_path}': #{e.message}")
    { error: e.message }
  end
end