Class: DSPy::Tools::TextProcessingToolset

Inherits:

Toolset

Object
Toolset
DSPy::Tools::TextProcessingToolset

show all

Extended by:: T::Sig

Defined in:: lib/dspy/tools/text_processing_toolset.rb

Overview

Text processing toolset that provides text analysis and manipulation tools Includes grep, word count, ripgrep, and other text processing utilities

Instance Method Summary collapse

Methods inherited from Toolset

schema_for_method, to_tools, tool, toolset_name

Constructor Details

#initialize ⇒ `TextProcessingToolset`

Returns a new instance of TextProcessingToolset.



28
29
30

# File 'lib/dspy/tools/text_processing_toolset.rb', line 28

def initialize
  # No persistent state needed for text processing
end

Instance Method Details

#extract_lines(text:, start_line:, end_line: nil) ⇒ `Object`

# File 'lib/dspy/tools/text_processing_toolset.rb', line 100

def extract_lines(text:, start_line:, end_line: nil)
  lines = text.lines
  start_idx = [start_line - 1, 0].max  # Convert to 0-based, ensure >= 0
  
  if end_line
    end_idx = [end_line - 1, lines.length - 1].min  # Convert to 0-based, ensure <= last line
    extracted = lines[start_idx..end_idx]
  else
    extracted = lines[start_idx, 1]  # Just one line
  end
  
  extracted&.join || ""
end

#filter_lines(text:, pattern:, invert: false) ⇒ `Object`

# File 'lib/dspy/tools/text_processing_toolset.rb', line 115

def filter_lines(text:, pattern:, invert: false)
  lines = text.lines
  regex = Regexp.new(pattern, Regexp::IGNORECASE)
  
  filtered = if invert
    lines.reject { |line| line.match?(regex) }
  else
    lines.select { |line| line.match?(regex) }
  end
  
  filtered.join
end

#grep(text:, pattern:, ignore_case: true, count_only: false) ⇒ `Object`

# File 'lib/dspy/tools/text_processing_toolset.rb', line 33

def grep(text:, pattern:, ignore_case: true, count_only: false)
  # Create temporary file to use with grep
  temp_file = Tempfile.new('text_processing')
  temp_file.write(text)
  temp_file.close

  flags = []
  flags << '-i' if ignore_case
  flags << '-c' if count_only

  cmd = "grep #{flags.join(' ')} '#{pattern}' '#{temp_file.path}'"
  result = `#{cmd} 2>/dev/null`
  
  temp_file.unlink
  
  if count_only
    "Found #{result.strip} matches for pattern '#{pattern}'"
  elsif result.empty?
    "No matches found for pattern '#{pattern}'"
  else
    result
  end
rescue => e
  "Error running grep: #{e.message}"
end

#ripgrep(text:, pattern:, context: 0) ⇒ `Object`

# File 'lib/dspy/tools/text_processing_toolset.rb', line 77

def ripgrep(text:, pattern:, context: 0)
  temp_file = Tempfile.new('text_processing')
  temp_file.write(text)
  temp_file.close

  cmd = "rg"
  cmd += " -C #{context}" if context > 0
  cmd += " '#{pattern}' '#{temp_file.path}'"
  
  result = `#{cmd} 2>/dev/null`
  
  temp_file.unlink
  
  if result.empty?
    "No matches found for pattern '#{pattern}'"
  else
    result
  end
rescue => e
  "Error running ripgrep: #{e.message}"
end

#sort_lines(text:, reverse: false, numeric: false) ⇒ `Object`

# File 'lib/dspy/tools/text_processing_toolset.rb', line 142

def sort_lines(text:, reverse: false, numeric: false)
  lines = text.lines.map(&:chomp)
  
  sorted = if numeric
    lines.sort_by { |line| line.to_f }
  else
    lines.sort
  end
  
  sorted.reverse! if reverse
  sorted.map { |line| "#{line}\n" }.join
end

#summarize_text(text:) ⇒ `Object`

# File 'lib/dspy/tools/text_processing_toolset.rb', line 156

def summarize_text(text:)
  lines = text.lines
  words = text.split(/\s+/).reject(&:empty?)
  chars = text.length
  
  # Find most common words (simple analysis)
  word_freq = words.each_with_object(Hash.new(0)) { |word, hash| hash[word.downcase.gsub(/[^\w]/, '')] += 1 }
  top_words = word_freq.reject { |word, _| word.length < 3 }.sort_by { |_, count| -count }.first(5)
  
  # Basic text statistics
  avg_line_length = lines.empty? ? 0 : (chars.to_f / lines.count).round(2)
  avg_word_length = words.empty? ? 0 : (words.sum(&:length).to_f / words.count).round(2)
  
  summary = []
  summary << "Text Summary:"
  summary << "  Lines: #{lines.count}"
  summary << "  Words: #{words.count}"
  summary << "  Characters: #{chars}"
  summary << "  Average line length: #{avg_line_length}"
  summary << "  Average word length: #{avg_word_length}"
  
  unless top_words.empty?
    summary << "  Most frequent words:"
    top_words.each { |word, count| summary << "    #{word}: #{count}" }
  end
  
  summary.join("\n")
end

#unique_lines(text:, preserve_order: true) ⇒ `Object`

# File 'lib/dspy/tools/text_processing_toolset.rb', line 129

def unique_lines(text:, preserve_order: true)
  lines = text.lines.map(&:chomp)
  
  unique = if preserve_order
    lines.uniq
  else
    lines.to_set.to_a.sort
  end
  
  unique.map { |line| "#{line}\n" }.join
end

#word_count(text:, lines_only: false, words_only: false, chars_only: false) ⇒ `Object`

# File 'lib/dspy/tools/text_processing_toolset.rb', line 60

def word_count(text:, lines_only: false, words_only: false, chars_only: false)
  lines = text.lines.count
  words = text.split(/\s+/).reject(&:empty?).count
  chars = text.length

  if lines_only
    "Lines: #{lines}"
  elsif words_only
    "Words: #{words}"
  elsif chars_only
    "Characters: #{chars}"
  else
    "Lines: #{lines}, Words: #{words}, Characters: #{chars}"
  end
end

Class: DSPy::Tools::TextProcessingToolset

Overview

Instance Method Summary collapse

Methods inherited from Toolset

Constructor Details

#initialize ⇒ TextProcessingToolset

Instance Method Details

#extract_lines(text:, start_line:, end_line: nil) ⇒ Object

#filter_lines(text:, pattern:, invert: false) ⇒ Object

#grep(text:, pattern:, ignore_case: true, count_only: false) ⇒ Object

#ripgrep(text:, pattern:, context: 0) ⇒ Object

#sort_lines(text:, reverse: false, numeric: false) ⇒ Object

#summarize_text(text:) ⇒ Object

#unique_lines(text:, preserve_order: true) ⇒ Object

#word_count(text:, lines_only: false, words_only: false, chars_only: false) ⇒ Object