Module: GEPA::Utils::Pareto

Extended by:
T::Sig
Defined in:
lib/gepa/utils/pareto.rb

Class Method Summary collapse

Class Method Details

.find_dominator_programs(pareto_front_programs, train_val_weighted_scores) ⇒ Object



73
74
75
76
# File 'lib/gepa/utils/pareto.rb', line 73

def self.find_dominator_programs(pareto_front_programs, train_val_weighted_scores)
  cleaned_frontiers = remove_dominated_programs(pareto_front_programs, scores: train_val_weighted_scores)
  cleaned_frontiers.flat_map(&:to_a).uniq
end

.idxmax(values) ⇒ Object

Raises:

  • (ArgumentError)


20
21
22
23
24
# File 'lib/gepa/utils/pareto.rb', line 20

def self.idxmax(values)
  raise ArgumentError, 'values must not be empty' if values.empty?

  values.each_with_index.max_by { |score, _i| score }&.last || 0
end

.json_default(value) ⇒ Object



13
14
15
16
17
# File 'lib/gepa/utils/pareto.rb', line 13

def self.json_default(value)
  value.is_a?(Hash) ? value.transform_keys(&:to_s) : JSON.parse(value.to_json)
rescue StandardError
  { value: value.to_s }
end

.remove_dominated_programs(program_at_pareto_front_valset, scores: nil) ⇒ Object



32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# File 'lib/gepa/utils/pareto.rb', line 32

def self.remove_dominated_programs(program_at_pareto_front_valset, scores: nil)
  normalized_fronts = program_at_pareto_front_valset.map { |front| front.to_a }

  frequency = Hash.new(0)
  normalized_fronts.each do |front|
    front.each { |program_idx| frequency[program_idx] += 1 }
  end

  all_programs = frequency.keys
  scores ||= all_programs.to_h { |idx| [idx, 1.0] }

  sorted_programs = all_programs.sort_by { |idx| scores.fetch(idx, 0.0) }

  dominated = Set.new
  loop do
    found = false
    sorted_programs.each do |candidate|
      next if dominated.include?(candidate)
      next unless dominated?(candidate, sorted_programs.to_set, dominated, normalized_fronts)

      dominated.add(candidate)
      found = true
      break
    end
    break unless found
  end

  dominators = sorted_programs.reject { |idx| dominated.include?(idx) }
  dominators_set = dominators.to_set

  normalized_fronts.map do |front|
    front.select { |idx| dominators_set.include?(idx) }
  end
end

.select_program_candidate_from_pareto_front(pareto_front_programs, weighted_scores, rng) ⇒ Object

Raises:

  • (ArgumentError)


85
86
87
88
89
90
91
92
93
94
95
# File 'lib/gepa/utils/pareto.rb', line 85

def self.select_program_candidate_from_pareto_front(pareto_front_programs, weighted_scores, rng)
  cleaned_frontiers = remove_dominated_programs(pareto_front_programs, scores: weighted_scores)
  frequency = Hash.new(0)
  cleaned_frontiers.each do |front|
    front.each { |idx| frequency[idx] += 1 }
  end
  raise ArgumentError, 'pareto front is empty' if frequency.empty?

  sampling_list = frequency.flat_map { |idx, freq| [idx] * freq }
  sampling_list[rng.rand(sampling_list.length)]
end