Class: DSPy::Teleprompt::DataHandler

Inherits:
Object
  • Object
show all
Extended by:
T::Sig
Defined in:
lib/dspy/teleprompt/data_handler.rb

Overview

Data handling for optimization with efficient operations Provides operations for large datasets during bootstrap and optimization

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(examples) ⇒ DataHandler

Returns a new instance of DataHandler.



17
18
19
# File 'lib/dspy/teleprompt/data_handler.rb', line 17

def initialize(examples)
  @examples = examples
end

Instance Attribute Details

#examplesObject (readonly)

Returns the value of attribute examples.



14
15
16
# File 'lib/dspy/teleprompt/data_handler.rb', line 14

def examples
  @examples
end

Instance Method Details

#create_candidate_sets(num_sets, set_size, random_state: nil) ⇒ Object



82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# File 'lib/dspy/teleprompt/data_handler.rb', line 82

def create_candidate_sets(num_sets, set_size, random_state: nil)
  return Array.new(num_sets) { [] } if @examples.empty?
  
  if random_state
    srand(random_state)
  end

  candidate_sets = []
  actual_set_size = [set_size, @examples.size].min
  
  num_sets.times do |i|
    # Use different random state for each set to ensure variety
    current_seed = random_state ? random_state + i : nil
    if current_seed
      srand(current_seed)
    end
    
    set_examples = @examples.sample(actual_set_size)
    candidate_sets << set_examples
  end

  candidate_sets
end

#each_batch(batch_size) ⇒ Object



49
50
51
# File 'lib/dspy/teleprompt/data_handler.rb', line 49

def each_batch(batch_size)
  @examples.each_slice(batch_size)
end

#partition_by_success(successful_indices) ⇒ Object



55
56
57
58
59
60
61
# File 'lib/dspy/teleprompt/data_handler.rb', line 55

def partition_by_success(successful_indices)
  successful_examples = successful_indices.map { |i| @examples[i] if i < @examples.size }.compact
  failed_indices = (0...@examples.size).to_a - successful_indices
  failed_examples = failed_indices.map { |i| @examples[i] }

  [successful_examples, failed_examples]
end

#sample(n, random_state: nil) ⇒ Object



23
24
25
26
27
28
29
30
31
32
33
34
35
# File 'lib/dspy/teleprompt/data_handler.rb', line 23

def sample(n, random_state: nil)
  return [] if @examples.empty? || n <= 0
  
  # Handle case where n is larger than available examples
  actual_n = [n, @examples.size].min
  
  # Set random seed if provided
  if random_state
    srand(random_state)
  end

  @examples.sample(actual_n)
end

#shuffle(random_state: nil) ⇒ Object



39
40
41
42
43
44
45
# File 'lib/dspy/teleprompt/data_handler.rb', line 39

def shuffle(random_state: nil)
  if random_state
    srand(random_state)
  end

  @examples.shuffle
end

#statisticsObject



72
73
74
75
76
77
78
# File 'lib/dspy/teleprompt/data_handler.rb', line 72

def statistics
  {
    total_examples: @examples.size,
    example_types: @examples.map(&:class).uniq.map(&:name),
    memory_usage_estimate: @examples.size * 1000 # Rough estimate
  }
end

#stratified_sample(n, stratify_column: nil) ⇒ Object



65
66
67
68
# File 'lib/dspy/teleprompt/data_handler.rb', line 65

def stratified_sample(n, stratify_column: nil)
  # For now, fall back to regular sampling (can be enhanced later)
  sample(n)
end