Class: DSPy::Tools::TextProcessingToolset
- Inherits:
-
Toolset
- Object
- Toolset
- DSPy::Tools::TextProcessingToolset
show all
- Extended by:
- T::Sig
- Defined in:
- lib/dspy/tools/text_processing_toolset.rb
Overview
Text processing toolset that provides text analysis and manipulation tools Includes grep, word count, ripgrep, and other text processing utilities
Instance Method Summary
collapse
-
#extract_lines(text:, start_line:, end_line: nil) ⇒ Object
-
#filter_lines(text:, pattern:, invert: false) ⇒ Object
-
#grep(text:, pattern:, ignore_case: true, count_only: false) ⇒ Object
-
#initialize ⇒ TextProcessingToolset
constructor
A new instance of TextProcessingToolset.
-
#ripgrep(text:, pattern:, context: 0) ⇒ Object
-
#sort_lines(text:, reverse: false, numeric: false) ⇒ Object
-
#summarize_text(text:) ⇒ Object
-
#unique_lines(text:, preserve_order: true) ⇒ Object
-
#word_count(text:, lines_only: false, words_only: false, chars_only: false) ⇒ Object
Methods inherited from Toolset
schema_for_method, to_tools, tool, toolset_name
Constructor Details
Returns a new instance of TextProcessingToolset.
28
29
30
|
# File 'lib/dspy/tools/text_processing_toolset.rb', line 28
def initialize
end
|
Instance Method Details
100
101
102
103
104
105
106
107
108
109
110
111
112
|
# File 'lib/dspy/tools/text_processing_toolset.rb', line 100
def (text:, start_line:, end_line: nil)
lines = text.lines
start_idx = [start_line - 1, 0].max
if end_line
end_idx = [end_line - 1, lines.length - 1].min = lines[start_idx..end_idx]
else
= lines[start_idx, 1] end
&.join || ""
end
|
#filter_lines(text:, pattern:, invert: false) ⇒ Object
115
116
117
118
119
120
121
122
123
124
125
126
|
# File 'lib/dspy/tools/text_processing_toolset.rb', line 115
def filter_lines(text:, pattern:, invert: false)
lines = text.lines
regex = Regexp.new(pattern, Regexp::IGNORECASE)
filtered = if invert
lines.reject { |line| line.match?(regex) }
else
lines.select { |line| line.match?(regex) }
end
filtered.join
end
|
#grep(text:, pattern:, ignore_case: true, count_only: false) ⇒ Object
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
|
# File 'lib/dspy/tools/text_processing_toolset.rb', line 33
def grep(text:, pattern:, ignore_case: true, count_only: false)
temp_file = Tempfile.new('text_processing')
temp_file.write(text)
temp_file.close
flags = []
flags << '-i' if ignore_case
flags << '-c' if count_only
cmd = "grep #{flags.join(' ')} '#{pattern}' '#{temp_file.path}'"
result = `#{cmd} 2>/dev/null`
temp_file.unlink
if count_only
"Found #{result.strip} matches for pattern '#{pattern}'"
elsif result.empty?
"No matches found for pattern '#{pattern}'"
else
result
end
rescue => e
"Error running grep: #{e.message}"
end
|
#ripgrep(text:, pattern:, context: 0) ⇒ Object
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
|
# File 'lib/dspy/tools/text_processing_toolset.rb', line 77
def ripgrep(text:, pattern:, context: 0)
temp_file = Tempfile.new('text_processing')
temp_file.write(text)
temp_file.close
cmd = "rg"
cmd += " -C #{context}" if context > 0
cmd += " '#{pattern}' '#{temp_file.path}'"
result = `#{cmd} 2>/dev/null`
temp_file.unlink
if result.empty?
"No matches found for pattern '#{pattern}'"
else
result
end
rescue => e
"Error running ripgrep: #{e.message}"
end
|
#sort_lines(text:, reverse: false, numeric: false) ⇒ Object
142
143
144
145
146
147
148
149
150
151
152
153
|
# File 'lib/dspy/tools/text_processing_toolset.rb', line 142
def sort_lines(text:, reverse: false, numeric: false)
lines = text.lines.map(&:chomp)
sorted = if numeric
lines.sort_by { |line| line.to_f }
else
lines.sort
end
sorted.reverse! if reverse
sorted.map { |line| "#{line}\n" }.join
end
|
#summarize_text(text:) ⇒ Object
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
|
# File 'lib/dspy/tools/text_processing_toolset.rb', line 156
def summarize_text(text:)
lines = text.lines
words = text.split(/\s+/).reject(&:empty?)
chars = text.length
word_freq = words.each_with_object(Hash.new(0)) { |word, hash| hash[word.downcase.gsub(/[^\w]/, '')] += 1 }
top_words = word_freq.reject { |word, _| word.length < 3 }.sort_by { |_, count| -count }.first(5)
avg_line_length = lines.empty? ? 0 : (chars.to_f / lines.count).round(2)
avg_word_length = words.empty? ? 0 : (words.sum(&:length).to_f / words.count).round(2)
summary = []
summary << "Text Summary:"
summary << " Lines: #{lines.count}"
summary << " Words: #{words.count}"
summary << " Characters: #{chars}"
summary << " Average line length: #{avg_line_length}"
summary << " Average word length: #{avg_word_length}"
unless top_words.empty?
summary << " Most frequent words:"
top_words.each { |word, count| summary << " #{word}: #{count}" }
end
summary.join("\n")
end
|
#unique_lines(text:, preserve_order: true) ⇒ Object
129
130
131
132
133
134
135
136
137
138
139
|
# File 'lib/dspy/tools/text_processing_toolset.rb', line 129
def unique_lines(text:, preserve_order: true)
lines = text.lines.map(&:chomp)
unique = if preserve_order
lines.uniq
else
lines.to_set.to_a.sort
end
unique.map { |line| "#{line}\n" }.join
end
|
#word_count(text:, lines_only: false, words_only: false, chars_only: false) ⇒ Object
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
|
# File 'lib/dspy/tools/text_processing_toolset.rb', line 60
def word_count(text:, lines_only: false, words_only: false, chars_only: false)
lines = text.lines.count
words = text.split(/\s+/).reject(&:empty?).count
chars = text.length
if lines_only
"Lines: #{lines}"
elsif words_only
"Words: #{words}"
elsif chars_only
"Characters: #{chars}"
else
"Lines: #{lines}, Words: #{words}, Characters: #{chars}"
end
end
|