Class: DSPy::Teleprompt::DataHandler
- Inherits:
-
Object
- Object
- DSPy::Teleprompt::DataHandler
- Extended by:
- T::Sig
- Defined in:
- lib/dspy/teleprompt/data_handler.rb
Overview
Data handling for optimization with efficient operations Provides operations for large datasets during bootstrap and optimization
Instance Attribute Summary collapse
-
#examples ⇒ Object
readonly
Returns the value of attribute examples.
Instance Method Summary collapse
- #create_candidate_sets(num_sets, set_size, random_state: nil) ⇒ Object
- #each_batch(batch_size) ⇒ Object
-
#initialize(examples) ⇒ DataHandler
constructor
A new instance of DataHandler.
- #partition_by_success(successful_indices) ⇒ Object
- #sample(n, random_state: nil) ⇒ Object
- #shuffle(random_state: nil) ⇒ Object
- #statistics ⇒ Object
- #stratified_sample(n, stratify_column: nil) ⇒ Object
Constructor Details
#initialize(examples) ⇒ DataHandler
Returns a new instance of DataHandler.
17 18 19 |
# File 'lib/dspy/teleprompt/data_handler.rb', line 17 def initialize(examples) @examples = examples end |
Instance Attribute Details
#examples ⇒ Object (readonly)
Returns the value of attribute examples.
14 15 16 |
# File 'lib/dspy/teleprompt/data_handler.rb', line 14 def examples @examples end |
Instance Method Details
#create_candidate_sets(num_sets, set_size, random_state: nil) ⇒ Object
82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
# File 'lib/dspy/teleprompt/data_handler.rb', line 82 def create_candidate_sets(num_sets, set_size, random_state: nil) return Array.new(num_sets) { [] } if @examples.empty? if random_state srand(random_state) end candidate_sets = [] actual_set_size = [set_size, @examples.size].min num_sets.times do |i| # Use different random state for each set to ensure variety current_seed = random_state ? random_state + i : nil if current_seed srand(current_seed) end set_examples = @examples.sample(actual_set_size) candidate_sets << set_examples end candidate_sets end |
#each_batch(batch_size) ⇒ Object
49 50 51 |
# File 'lib/dspy/teleprompt/data_handler.rb', line 49 def each_batch(batch_size) @examples.each_slice(batch_size) end |
#partition_by_success(successful_indices) ⇒ Object
55 56 57 58 59 60 61 |
# File 'lib/dspy/teleprompt/data_handler.rb', line 55 def partition_by_success(successful_indices) successful_examples = successful_indices.map { |i| @examples[i] if i < @examples.size }.compact failed_indices = (0...@examples.size).to_a - successful_indices failed_examples = failed_indices.map { |i| @examples[i] } [successful_examples, failed_examples] end |
#sample(n, random_state: nil) ⇒ Object
23 24 25 26 27 28 29 30 31 32 33 34 35 |
# File 'lib/dspy/teleprompt/data_handler.rb', line 23 def sample(n, random_state: nil) return [] if @examples.empty? || n <= 0 # Handle case where n is larger than available examples actual_n = [n, @examples.size].min # Set random seed if provided if random_state srand(random_state) end @examples.sample(actual_n) end |
#shuffle(random_state: nil) ⇒ Object
39 40 41 42 43 44 45 |
# File 'lib/dspy/teleprompt/data_handler.rb', line 39 def shuffle(random_state: nil) if random_state srand(random_state) end @examples.shuffle end |
#statistics ⇒ Object
72 73 74 75 76 77 78 |
# File 'lib/dspy/teleprompt/data_handler.rb', line 72 def statistics { total_examples: @examples.size, example_types: @examples.map(&:class).uniq.map(&:name), memory_usage_estimate: @examples.size * 1000 # Rough estimate } end |
#stratified_sample(n, stratify_column: nil) ⇒ Object
65 66 67 68 |
# File 'lib/dspy/teleprompt/data_handler.rb', line 65 def stratified_sample(n, stratify_column: nil) # For now, fall back to regular sampling (can be enhanced later) sample(n) end |