Class: GEPA::Core::State

Inherits:
Object
  • Object
show all
Extended by:
T::Sig
Defined in:
lib/gepa/core/state.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(seed_candidate, base_valset_eval_output, track_best_outputs: false) ⇒ State

Returns a new instance of State.

Raises:

  • (ArgumentError)


37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# File 'lib/gepa/core/state.rb', line 37

def initialize(seed_candidate, base_valset_eval_output, track_best_outputs: false)
  outputs, scores = base_valset_eval_output
  raise ArgumentError, 'validation scores must not be empty' if scores.empty?

  valset_base_score = scores.sum / scores.length.to_f

  @program_candidates = [seed_candidate.dup]
  @program_full_scores_val_set = [valset_base_score]
  @per_program_tracked_scores = [valset_base_score]

  @pareto_front_valset = scores.dup
  @parent_program_for_candidate = [[nil]]
  @program_at_pareto_front_valset = Array.new(scores.length) { Set.new([0]) }

  @list_of_named_predictors = seed_candidate.keys
  @named_predictor_id_to_update_next_for_program_candidate = [0]

  @prog_candidate_val_subscores = [scores.dup]
  @num_metric_calls_by_discovery = [0]

  @best_outputs_valset = if track_best_outputs
    outputs.map { |output| [[0, output]] }
  end

  @full_program_trace = []
  @i = -1
  @num_full_ds_evals = 0
  @total_num_evals = 0
end

Instance Attribute Details

#best_outputs_valsetObject (readonly)

Returns the value of attribute best_outputs_valset.



17
18
19
# File 'lib/gepa/core/state.rb', line 17

def best_outputs_valset
  @best_outputs_valset
end

#full_program_traceObject (readonly)

Returns the value of attribute full_program_trace.



17
18
19
# File 'lib/gepa/core/state.rb', line 17

def full_program_trace
  @full_program_trace
end

#iObject

Returns the value of attribute i.



16
17
18
# File 'lib/gepa/core/state.rb', line 16

def i
  @i
end

#list_of_named_predictorsObject (readonly)

Returns the value of attribute list_of_named_predictors.



17
18
19
# File 'lib/gepa/core/state.rb', line 17

def list_of_named_predictors
  @list_of_named_predictors
end

#named_predictor_id_to_update_next_for_program_candidateObject (readonly)

Returns the value of attribute named_predictor_id_to_update_next_for_program_candidate.



17
18
19
# File 'lib/gepa/core/state.rb', line 17

def named_predictor_id_to_update_next_for_program_candidate
  @named_predictor_id_to_update_next_for_program_candidate
end

#num_full_ds_evalsObject

Returns the value of attribute num_full_ds_evals.



16
17
18
# File 'lib/gepa/core/state.rb', line 16

def num_full_ds_evals
  @num_full_ds_evals
end

#num_metric_calls_by_discoveryObject (readonly)

Returns the value of attribute num_metric_calls_by_discovery.



17
18
19
# File 'lib/gepa/core/state.rb', line 17

def num_metric_calls_by_discovery
  @num_metric_calls_by_discovery
end

#parent_program_for_candidateObject (readonly)

Returns the value of attribute parent_program_for_candidate.



17
18
19
# File 'lib/gepa/core/state.rb', line 17

def parent_program_for_candidate
  @parent_program_for_candidate
end

#pareto_front_valsetObject (readonly)

Returns the value of attribute pareto_front_valset.



17
18
19
# File 'lib/gepa/core/state.rb', line 17

def pareto_front_valset
  @pareto_front_valset
end

#per_program_tracked_scoresObject (readonly)

Returns the value of attribute per_program_tracked_scores.



17
18
19
# File 'lib/gepa/core/state.rb', line 17

def per_program_tracked_scores
  @per_program_tracked_scores
end

#prog_candidate_val_subscoresObject (readonly)

Returns the value of attribute prog_candidate_val_subscores.



17
18
19
# File 'lib/gepa/core/state.rb', line 17

def prog_candidate_val_subscores
  @prog_candidate_val_subscores
end

#program_at_pareto_front_valsetObject (readonly)

Returns the value of attribute program_at_pareto_front_valset.



17
18
19
# File 'lib/gepa/core/state.rb', line 17

def program_at_pareto_front_valset
  @program_at_pareto_front_valset
end

#program_candidatesObject (readonly)

Returns the value of attribute program_candidates.



17
18
19
# File 'lib/gepa/core/state.rb', line 17

def program_candidates
  @program_candidates
end

#program_full_scores_val_setObject (readonly)

Returns the value of attribute program_full_scores_val_set.



17
18
19
# File 'lib/gepa/core/state.rb', line 17

def program_full_scores_val_set
  @program_full_scores_val_set
end

#total_num_evalsObject

Returns the value of attribute total_num_evals.



16
17
18
# File 'lib/gepa/core/state.rb', line 16

def total_num_evals
  @total_num_evals
end

Class Method Details

.initialize_gepa_state(run_dir:, logger:, seed_candidate:, valset_evaluator:, track_best_outputs: false) ⇒ Object



191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
# File 'lib/gepa/core/state.rb', line 191

def self.initialize_gepa_state(run_dir:, logger:, seed_candidate:, valset_evaluator:, track_best_outputs: false)
  if run_dir && File.exist?(File.join(run_dir, 'gepa_state.bin')) && File.exist?(File.join(run_dir, 'prog_candidates'))
    logger.log('Loading gepa state from run dir')
    return load(run_dir)
  end

  valset_out = valset_evaluator.call(seed_candidate)
  if run_dir
    write_eval_output_to_directory(valset_out, File.join(run_dir, 'generated_best_outputs_valset'))
  end

  state = new(seed_candidate, valset_out, track_best_outputs: track_best_outputs)
  state.num_full_ds_evals = 1
  state.total_num_evals = valset_out.last.length
  state
end

.load(run_dir) ⇒ Object



100
101
102
103
104
105
106
107
108
# File 'lib/gepa/core/state.rb', line 100

def self.load(run_dir)
  File.open(File.join(run_dir, 'gepa_state.bin'), 'rb') do |file|
    data = Marshal.load(file)
    state = allocate
    data.each { |key, value| state.instance_variable_set("@#{key}", value) }
    state.consistent?
    state
  end
end

.write_eval_output_to_directory(eval_output, output_dir) ⇒ Object



172
173
174
175
176
177
178
179
180
# File 'lib/gepa/core/state.rb', line 172

def self.write_eval_output_to_directory(eval_output, output_dir)
  _, scores = eval_output
  scores.each_with_index do |_score, task_idx|
    dir = File.join(output_dir, "task_#{task_idx}")
    FileUtils.mkdir_p(dir)
    path = File.join(dir, 'iter_0_prog_0.json')
    File.write(path, JSON.pretty_generate(scores[task_idx]))
  end
end

Instance Method Details

#consistent?Boolean

Returns:

  • (Boolean)


68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# File 'lib/gepa/core/state.rb', line 68

def consistent?
  size = @program_candidates.length
  raise 'program_full_scores_val_set mismatch' unless @program_full_scores_val_set.length == size
  raise 'per_program_tracked_scores mismatch' unless @per_program_tracked_scores.length == size
  raise 'parent_program_for_candidate mismatch' unless @parent_program_for_candidate.length == size
  raise 'named_predictor_id_to_update mismatch' unless @named_predictor_id_to_update_next_for_program_candidate.length == size
  raise 'prog_candidate_val_subscores mismatch' unless @prog_candidate_val_subscores.length == size
  raise 'num_metric_calls mismatch' unless @num_metric_calls_by_discovery.length == size
  raise 'pareto fronts length mismatch' unless @pareto_front_valset.length == @program_at_pareto_front_valset.length

  @program_at_pareto_front_valset.each do |front|
    front.each do |idx|
      raise 'pareto index out of range' unless idx < size
    end
  end
  true
end

#save(run_dir) ⇒ Object



87
88
89
90
91
92
93
94
95
96
97
# File 'lib/gepa/core/state.rb', line 87

def save(run_dir)
  return if run_dir.nil?

  FileUtils.mkdir_p(run_dir)
  File.open(File.join(run_dir, 'gepa_state.bin'), 'wb') do |file|
    data = instance_variables.each_with_object({}) do |ivar, acc|
      acc[ivar.to_s.delete('@')] = instance_variable_get(ivar)
    end
    Marshal.dump(data, file)
  end
end

#update_state_with_new_program(parent_program_idx, new_program, valset_score, valset_outputs, valset_subscores, run_dir, num_metric_calls) ⇒ Object



121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
# File 'lib/gepa/core/state.rb', line 121

def update_state_with_new_program(
  parent_program_idx,
  new_program,
  valset_score,
  valset_outputs,
  valset_subscores,
  run_dir,
  num_metric_calls
)
  new_program_idx = @program_candidates.length
  @program_candidates << new_program.dup
  @num_metric_calls_by_discovery << num_metric_calls

  max_predictor_id = parent_program_idx.map { |idx| @named_predictor_id_to_update_next_for_program_candidate[idx] }.compact.max
  @named_predictor_id_to_update_next_for_program_candidate << (max_predictor_id || 0)
  @parent_program_for_candidate << parent_program_idx.dup

  @prog_candidate_val_subscores << valset_subscores.dup
  @program_full_scores_val_set << valset_score.to_f

  valset_subscores.each_with_index do |new_score, task_idx|
    old_score = @pareto_front_valset[task_idx]
    if new_score > old_score
      @pareto_front_valset[task_idx] = new_score
      @program_at_pareto_front_valset[task_idx] = Set.new([new_program_idx])
      if @best_outputs_valset
        @best_outputs_valset[task_idx] = [[new_program_idx, valset_outputs[task_idx]]]
      end
      write_best_output(run_dir, task_idx, new_program_idx, valset_outputs[task_idx])
    elsif new_score == old_score
      @program_at_pareto_front_valset[task_idx].add(new_program_idx)
      if @best_outputs_valset
        @best_outputs_valset[task_idx] << [new_program_idx, valset_outputs[task_idx]]
      end
    end
  end

  raise 'valset subscores length mismatch' unless valset_subscores.length == @program_at_pareto_front_valset.length

  @per_program_tracked_scores = @program_full_scores_val_set.dup
  linear_idx = GEPA::Utils::Pareto.idxmax(@per_program_tracked_scores)

  [new_program_idx, linear_idx]
end