Class: DSPy::Tools::TextProcessingToolset

Inherits:
Toolset
  • Object
show all
Extended by:
T::Sig
Defined in:
lib/dspy/tools/text_processing_toolset.rb

Overview

Text processing toolset that provides text analysis and manipulation tools Includes grep, word count, ripgrep, and other text processing utilities

Instance Method Summary collapse

Methods inherited from Toolset

schema_for_method, to_tools, tool, toolset_name

Constructor Details

#initializeTextProcessingToolset

Returns a new instance of TextProcessingToolset.



28
29
30
# File 'lib/dspy/tools/text_processing_toolset.rb', line 28

def initialize
  # No persistent state needed for text processing
end

Instance Method Details

#extract_lines(text:, start_line:, end_line: nil) ⇒ Object



100
101
102
103
104
105
106
107
108
109
110
111
112
# File 'lib/dspy/tools/text_processing_toolset.rb', line 100

def extract_lines(text:, start_line:, end_line: nil)
  lines = text.lines
  start_idx = [start_line - 1, 0].max  # Convert to 0-based, ensure >= 0
  
  if end_line
    end_idx = [end_line - 1, lines.length - 1].min  # Convert to 0-based, ensure <= last line
    extracted = lines[start_idx..end_idx]
  else
    extracted = lines[start_idx, 1]  # Just one line
  end
  
  extracted&.join || ""
end

#filter_lines(text:, pattern:, invert: false) ⇒ Object



115
116
117
118
119
120
121
122
123
124
125
126
# File 'lib/dspy/tools/text_processing_toolset.rb', line 115

def filter_lines(text:, pattern:, invert: false)
  lines = text.lines
  regex = Regexp.new(pattern, Regexp::IGNORECASE)
  
  filtered = if invert
    lines.reject { |line| line.match?(regex) }
  else
    lines.select { |line| line.match?(regex) }
  end
  
  filtered.join
end

#grep(text:, pattern:, ignore_case: true, count_only: false) ⇒ Object



33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# File 'lib/dspy/tools/text_processing_toolset.rb', line 33

def grep(text:, pattern:, ignore_case: true, count_only: false)
  # Create temporary file to use with grep
  temp_file = Tempfile.new('text_processing')
  temp_file.write(text)
  temp_file.close

  flags = []
  flags << '-i' if ignore_case
  flags << '-c' if count_only

  cmd = "grep #{flags.join(' ')} '#{pattern}' '#{temp_file.path}'"
  result = `#{cmd} 2>/dev/null`
  
  temp_file.unlink
  
  if count_only
    "Found #{result.strip} matches for pattern '#{pattern}'"
  elsif result.empty?
    "No matches found for pattern '#{pattern}'"
  else
    result
  end
rescue => e
  "Error running grep: #{e.message}"
end

#ripgrep(text:, pattern:, context: 0) ⇒ Object



77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# File 'lib/dspy/tools/text_processing_toolset.rb', line 77

def ripgrep(text:, pattern:, context: 0)
  temp_file = Tempfile.new('text_processing')
  temp_file.write(text)
  temp_file.close

  cmd = "rg"
  cmd += " -C #{context}" if context > 0
  cmd += " '#{pattern}' '#{temp_file.path}'"
  
  result = `#{cmd} 2>/dev/null`
  
  temp_file.unlink
  
  if result.empty?
    "No matches found for pattern '#{pattern}'"
  else
    result
  end
rescue => e
  "Error running ripgrep: #{e.message}"
end

#sort_lines(text:, reverse: false, numeric: false) ⇒ Object



142
143
144
145
146
147
148
149
150
151
152
153
# File 'lib/dspy/tools/text_processing_toolset.rb', line 142

def sort_lines(text:, reverse: false, numeric: false)
  lines = text.lines.map(&:chomp)
  
  sorted = if numeric
    lines.sort_by { |line| line.to_f }
  else
    lines.sort
  end
  
  sorted.reverse! if reverse
  sorted.map { |line| "#{line}\n" }.join
end

#summarize_text(text:) ⇒ Object



156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
# File 'lib/dspy/tools/text_processing_toolset.rb', line 156

def summarize_text(text:)
  lines = text.lines
  words = text.split(/\s+/).reject(&:empty?)
  chars = text.length
  
  # Find most common words (simple analysis)
  word_freq = words.each_with_object(Hash.new(0)) { |word, hash| hash[word.downcase.gsub(/[^\w]/, '')] += 1 }
  top_words = word_freq.reject { |word, _| word.length < 3 }.sort_by { |_, count| -count }.first(5)
  
  # Basic text statistics
  avg_line_length = lines.empty? ? 0 : (chars.to_f / lines.count).round(2)
  avg_word_length = words.empty? ? 0 : (words.sum(&:length).to_f / words.count).round(2)
  
  summary = []
  summary << "Text Summary:"
  summary << "  Lines: #{lines.count}"
  summary << "  Words: #{words.count}"
  summary << "  Characters: #{chars}"
  summary << "  Average line length: #{avg_line_length}"
  summary << "  Average word length: #{avg_word_length}"
  
  unless top_words.empty?
    summary << "  Most frequent words:"
    top_words.each { |word, count| summary << "    #{word}: #{count}" }
  end
  
  summary.join("\n")
end

#unique_lines(text:, preserve_order: true) ⇒ Object



129
130
131
132
133
134
135
136
137
138
139
# File 'lib/dspy/tools/text_processing_toolset.rb', line 129

def unique_lines(text:, preserve_order: true)
  lines = text.lines.map(&:chomp)
  
  unique = if preserve_order
    lines.uniq
  else
    lines.to_set.to_a.sort
  end
  
  unique.map { |line| "#{line}\n" }.join
end

#word_count(text:, lines_only: false, words_only: false, chars_only: false) ⇒ Object



60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# File 'lib/dspy/tools/text_processing_toolset.rb', line 60

def word_count(text:, lines_only: false, words_only: false, chars_only: false)
  lines = text.lines.count
  words = text.split(/\s+/).reject(&:empty?).count
  chars = text.length

  if lines_only
    "Lines: #{lines}"
  elsif words_only
    "Words: #{words}"
  elsif chars_only
    "Characters: #{chars}"
  else
    "Lines: #{lines}, Words: #{words}, Characters: #{chars}"
  end
end