Class: GEPA::Core::State

Inherits:

Object

Object
GEPA::Core::State

show all

Extended by:: T::Sig

Defined in:: lib/gepa/core/state.rb

Instance Attribute Summary collapse

#best_outputs_valset ⇒ Object readonly

Returns the value of attribute best_outputs_valset.
#full_program_trace ⇒ Object readonly

Returns the value of attribute full_program_trace.
#i ⇒ Object

Returns the value of attribute i.
#list_of_named_predictors ⇒ Object readonly

Returns the value of attribute list_of_named_predictors.
#named_predictor_id_to_update_next_for_program_candidate ⇒ Object readonly

Returns the value of attribute named_predictor_id_to_update_next_for_program_candidate.
#num_full_ds_evals ⇒ Object

Returns the value of attribute num_full_ds_evals.
#num_metric_calls_by_discovery ⇒ Object readonly

Returns the value of attribute num_metric_calls_by_discovery.
#parent_program_for_candidate ⇒ Object readonly

Returns the value of attribute parent_program_for_candidate.
#pareto_front_valset ⇒ Object readonly

Returns the value of attribute pareto_front_valset.
#per_program_tracked_scores ⇒ Object readonly

Returns the value of attribute per_program_tracked_scores.
#prog_candidate_val_subscores ⇒ Object readonly

Returns the value of attribute prog_candidate_val_subscores.
#program_at_pareto_front_valset ⇒ Object readonly

Returns the value of attribute program_at_pareto_front_valset.
#program_candidates ⇒ Object readonly

Returns the value of attribute program_candidates.
#program_full_scores_val_set ⇒ Object readonly

Returns the value of attribute program_full_scores_val_set.
#total_num_evals ⇒ Object

Returns the value of attribute total_num_evals.

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(seed_candidate, base_valset_eval_output, track_best_outputs: false) ⇒ `State`

Returns a new instance of State.

Raises:

(ArgumentError)

# File 'lib/gepa/core/state.rb', line 37

def initialize(seed_candidate, base_valset_eval_output, track_best_outputs: false)
  outputs, scores = base_valset_eval_output
  raise ArgumentError, 'validation scores must not be empty' if scores.empty?

  valset_base_score = scores.sum / scores.length.to_f

  @program_candidates = [seed_candidate.dup]
  @program_full_scores_val_set = [valset_base_score]
  @per_program_tracked_scores = [valset_base_score]

  @pareto_front_valset = scores.dup
  @parent_program_for_candidate = [[nil]]
  @program_at_pareto_front_valset = Array.new(scores.length) { Set.new([0]) }

  @list_of_named_predictors = seed_candidate.keys
  @named_predictor_id_to_update_next_for_program_candidate = [0]

  @prog_candidate_val_subscores = [scores.dup]
  @num_metric_calls_by_discovery = [0]

  @best_outputs_valset = if track_best_outputs
    outputs.map { |output| [[0, output]] }
  end

  @full_program_trace = []
  @i = -1
  @num_full_ds_evals = 0
  @total_num_evals = 0
end

Instance Attribute Details

#best_outputs_valset ⇒ `Object` (readonly)

Returns the value of attribute best_outputs_valset.



17
18
19

# File 'lib/gepa/core/state.rb', line 17

def best_outputs_valset
  @best_outputs_valset
end

#full_program_trace ⇒ `Object` (readonly)

Returns the value of attribute full_program_trace.



17
18
19

# File 'lib/gepa/core/state.rb', line 17

def full_program_trace
  @full_program_trace
end

#i ⇒ `Object`

Returns the value of attribute i.



16
17
18

# File 'lib/gepa/core/state.rb', line 16

def i
  @i
end

#list_of_named_predictors ⇒ `Object` (readonly)

Returns the value of attribute list_of_named_predictors.



17
18
19

# File 'lib/gepa/core/state.rb', line 17

def list_of_named_predictors
  @list_of_named_predictors
end

#named_predictor_id_to_update_next_for_program_candidate ⇒ `Object` (readonly)

Returns the value of attribute named_predictor_id_to_update_next_for_program_candidate.



17
18
19

# File 'lib/gepa/core/state.rb', line 17

def named_predictor_id_to_update_next_for_program_candidate
  @named_predictor_id_to_update_next_for_program_candidate
end

#num_full_ds_evals ⇒ `Object`

Returns the value of attribute num_full_ds_evals.



16
17
18

# File 'lib/gepa/core/state.rb', line 16

def num_full_ds_evals
  @num_full_ds_evals
end

#num_metric_calls_by_discovery ⇒ `Object` (readonly)

Returns the value of attribute num_metric_calls_by_discovery.



17
18
19

# File 'lib/gepa/core/state.rb', line 17

def num_metric_calls_by_discovery
  @num_metric_calls_by_discovery
end

#parent_program_for_candidate ⇒ `Object` (readonly)

Returns the value of attribute parent_program_for_candidate.



17
18
19

# File 'lib/gepa/core/state.rb', line 17

def parent_program_for_candidate
  @parent_program_for_candidate
end

#pareto_front_valset ⇒ `Object` (readonly)

Returns the value of attribute pareto_front_valset.



17
18
19

# File 'lib/gepa/core/state.rb', line 17

def pareto_front_valset
  @pareto_front_valset
end

#per_program_tracked_scores ⇒ `Object` (readonly)

Returns the value of attribute per_program_tracked_scores.



17
18
19

# File 'lib/gepa/core/state.rb', line 17

def per_program_tracked_scores
  @per_program_tracked_scores
end

#prog_candidate_val_subscores ⇒ `Object` (readonly)

Returns the value of attribute prog_candidate_val_subscores.



17
18
19

# File 'lib/gepa/core/state.rb', line 17

def prog_candidate_val_subscores
  @prog_candidate_val_subscores
end

#program_at_pareto_front_valset ⇒ `Object` (readonly)

Returns the value of attribute program_at_pareto_front_valset.



17
18
19

# File 'lib/gepa/core/state.rb', line 17

def program_at_pareto_front_valset
  @program_at_pareto_front_valset
end

#program_candidates ⇒ `Object` (readonly)

Returns the value of attribute program_candidates.



17
18
19

# File 'lib/gepa/core/state.rb', line 17

def program_candidates
  @program_candidates
end

#program_full_scores_val_set ⇒ `Object` (readonly)

Returns the value of attribute program_full_scores_val_set.



17
18
19

# File 'lib/gepa/core/state.rb', line 17

def program_full_scores_val_set
  @program_full_scores_val_set
end

#total_num_evals ⇒ `Object`

Returns the value of attribute total_num_evals.



16
17
18

# File 'lib/gepa/core/state.rb', line 16

def total_num_evals
  @total_num_evals
end

Class Method Details

.initialize_gepa_state(run_dir:, logger:, seed_candidate:, valset_evaluator:, track_best_outputs: false) ⇒ `Object`

# File 'lib/gepa/core/state.rb', line 191

def self.initialize_gepa_state(run_dir:, logger:, seed_candidate:, valset_evaluator:, track_best_outputs: false)
  if run_dir && File.exist?(File.join(run_dir, 'gepa_state.bin')) && File.exist?(File.join(run_dir, 'prog_candidates'))
    logger.log('Loading gepa state from run dir')
    return load(run_dir)
  end

  valset_out = valset_evaluator.call(seed_candidate)
  if run_dir
    write_eval_output_to_directory(valset_out, File.join(run_dir, 'generated_best_outputs_valset'))
  end

  state = new(seed_candidate, valset_out, track_best_outputs: track_best_outputs)
  state.num_full_ds_evals = 1
  state.total_num_evals = valset_out.last.length
  state
end

.load(run_dir) ⇒ `Object`

# File 'lib/gepa/core/state.rb', line 100

def self.load(run_dir)
  File.open(File.join(run_dir, 'gepa_state.bin'), 'rb') do |file|
    data = Marshal.load(file)
    state = allocate
    data.each { |key, value| state.instance_variable_set("@#{key}", value) }
    state.consistent?
    state
  end
end

.write_eval_output_to_directory(eval_output, output_dir) ⇒ `Object`

# File 'lib/gepa/core/state.rb', line 172

def self.write_eval_output_to_directory(eval_output, output_dir)
  _, scores = eval_output
  scores.each_with_index do |_score, task_idx|
    dir = File.join(output_dir, "task_#{task_idx}")
    FileUtils.mkdir_p(dir)
    path = File.join(dir, 'iter_0_prog_0.json')
    File.write(path, JSON.pretty_generate(scores[task_idx]))
  end
end

Instance Method Details

#consistent? ⇒ `Boolean`

Returns:

(Boolean)

# File 'lib/gepa/core/state.rb', line 68

def consistent?
  size = @program_candidates.length
  raise 'program_full_scores_val_set mismatch' unless @program_full_scores_val_set.length == size
  raise 'per_program_tracked_scores mismatch' unless @per_program_tracked_scores.length == size
  raise 'parent_program_for_candidate mismatch' unless @parent_program_for_candidate.length == size
  raise 'named_predictor_id_to_update mismatch' unless @named_predictor_id_to_update_next_for_program_candidate.length == size
  raise 'prog_candidate_val_subscores mismatch' unless @prog_candidate_val_subscores.length == size
  raise 'num_metric_calls mismatch' unless @num_metric_calls_by_discovery.length == size
  raise 'pareto fronts length mismatch' unless @pareto_front_valset.length == @program_at_pareto_front_valset.length

  @program_at_pareto_front_valset.each do |front|
    front.each do |idx|
      raise 'pareto index out of range' unless idx < size
    end
  end
  true
end

#save(run_dir) ⇒ `Object`

# File 'lib/gepa/core/state.rb', line 87

def save(run_dir)
  return if run_dir.nil?

  FileUtils.mkdir_p(run_dir)
  File.open(File.join(run_dir, 'gepa_state.bin'), 'wb') do |file|
    data = instance_variables.each_with_object({}) do |ivar, acc|
      acc[ivar.to_s.delete('@')] = instance_variable_get(ivar)
    end
    Marshal.dump(data, file)
  end
end

#update_state_with_new_program(parent_program_idx, new_program, valset_score, valset_outputs, valset_subscores, run_dir, num_metric_calls) ⇒ `Object`

# File 'lib/gepa/core/state.rb', line 121

def update_state_with_new_program(
  parent_program_idx,
  new_program,
  valset_score,
  valset_outputs,
  valset_subscores,
  run_dir,
  num_metric_calls
)
  new_program_idx = @program_candidates.length
  @program_candidates << new_program.dup
  @num_metric_calls_by_discovery << num_metric_calls

  max_predictor_id = parent_program_idx.map { |idx| @named_predictor_id_to_update_next_for_program_candidate[idx] }.compact.max
  @named_predictor_id_to_update_next_for_program_candidate << (max_predictor_id || 0)
  @parent_program_for_candidate << parent_program_idx.dup

  @prog_candidate_val_subscores << valset_subscores.dup
  @program_full_scores_val_set << valset_score.to_f

  valset_subscores.each_with_index do |new_score, task_idx|
    old_score = @pareto_front_valset[task_idx]
    if new_score > old_score
      @pareto_front_valset[task_idx] = new_score
      @program_at_pareto_front_valset[task_idx] = Set.new([new_program_idx])
      if @best_outputs_valset
        @best_outputs_valset[task_idx] = [[new_program_idx, valset_outputs[task_idx]]]
      end
      write_best_output(run_dir, task_idx, new_program_idx, valset_outputs[task_idx])
    elsif new_score == old_score
      @program_at_pareto_front_valset[task_idx].add(new_program_idx)
      if @best_outputs_valset
        @best_outputs_valset[task_idx] << [new_program_idx, valset_outputs[task_idx]]
      end
    end
  end

  raise 'valset subscores length mismatch' unless valset_subscores.length == @program_at_pareto_front_valset.length

  @per_program_tracked_scores = @program_full_scores_val_set.dup
  linear_idx = GEPA::Utils::Pareto.idxmax(@per_program_tracked_scores)

  [new_program_idx, linear_idx]
end

Class: GEPA::Core::State

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(seed_candidate, base_valset_eval_output, track_best_outputs: false) ⇒ State

Instance Attribute Details

#best_outputs_valset ⇒ Object (readonly)

#full_program_trace ⇒ Object (readonly)

#i ⇒ Object

#list_of_named_predictors ⇒ Object (readonly)

#named_predictor_id_to_update_next_for_program_candidate ⇒ Object (readonly)

#num_full_ds_evals ⇒ Object

#num_metric_calls_by_discovery ⇒ Object (readonly)

#parent_program_for_candidate ⇒ Object (readonly)

#pareto_front_valset ⇒ Object (readonly)

#per_program_tracked_scores ⇒ Object (readonly)

#prog_candidate_val_subscores ⇒ Object (readonly)

#program_at_pareto_front_valset ⇒ Object (readonly)

#program_candidates ⇒ Object (readonly)

#program_full_scores_val_set ⇒ Object (readonly)

#total_num_evals ⇒ Object