Class: DSPy::Teleprompt::GEPA::FitnessEvaluator
- Inherits:
-
Object
- Object
- DSPy::Teleprompt::GEPA::FitnessEvaluator
- Extended by:
- T::Sig
- Defined in:
- lib/dspy/teleprompt/gepa.rb
Overview
FitnessEvaluator provides multi-dimensional evaluation of prompt candidates
Instance Attribute Summary collapse
-
#config ⇒ Object
readonly
Returns the value of attribute config.
-
#primary_metric ⇒ Object
readonly
Returns the value of attribute primary_metric.
-
#secondary_metrics ⇒ Object
readonly
Returns the value of attribute secondary_metrics.
Instance Method Summary collapse
- #batch_evaluate(programs, trainset) ⇒ Object
- #compare_candidates(score1, score2) ⇒ Object
- #evaluate_candidate(program, trainset) ⇒ Object
-
#initialize(primary_metric:, config:, secondary_metrics: nil) ⇒ FitnessEvaluator
constructor
A new instance of FitnessEvaluator.
- #rank_candidates(scores) ⇒ Object
Constructor Details
#initialize(primary_metric:, config:, secondary_metrics: nil) ⇒ FitnessEvaluator
Returns a new instance of FitnessEvaluator.
1653 1654 1655 1656 1657 1658 |
# File 'lib/dspy/teleprompt/gepa.rb', line 1653 def initialize(primary_metric:, config:, secondary_metrics: nil) @primary_metric = primary_metric @config = config @secondary_metrics = secondary_metrics || default_secondary_metrics @trace_collector = TraceCollector.new end |
Instance Attribute Details
#config ⇒ Object (readonly)
Returns the value of attribute config.
1641 1642 1643 |
# File 'lib/dspy/teleprompt/gepa.rb', line 1641 def config @config end |
#primary_metric ⇒ Object (readonly)
Returns the value of attribute primary_metric.
1638 1639 1640 |
# File 'lib/dspy/teleprompt/gepa.rb', line 1638 def primary_metric @primary_metric end |
#secondary_metrics ⇒ Object (readonly)
Returns the value of attribute secondary_metrics.
1644 1645 1646 |
# File 'lib/dspy/teleprompt/gepa.rb', line 1644 def secondary_metrics @secondary_metrics end |
Instance Method Details
#batch_evaluate(programs, trainset) ⇒ Object
1736 1737 1738 |
# File 'lib/dspy/teleprompt/gepa.rb', line 1736 def batch_evaluate(programs, trainset) programs.map { |program| evaluate_candidate(program, trainset) } end |
#compare_candidates(score1, score2) ⇒ Object
1742 1743 1744 |
# File 'lib/dspy/teleprompt/gepa.rb', line 1742 def compare_candidates(score1, score2) score1.overall_score - score2.overall_score end |
#evaluate_candidate(program, trainset) ⇒ Object
1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 |
# File 'lib/dspy/teleprompt/gepa.rb', line 1662 def evaluate_candidate(program, trainset) start_time = Time.now predictions = [] traces = [] # Collect primary metric scores and execution data primary_scores = trainset.map do |example| prediction_start = Time.now prediction = program.call(**example.input_values) prediction_time = Time.now - prediction_start predictions << { prediction: prediction, latency: prediction_time, example: example } @primary_metric.call(example, prediction).to_f rescue => e # Handle prediction errors predictions << { prediction: nil, latency: 0.0, example: example, error: e. } 0.0 end primary_score = primary_scores.sum / primary_scores.size # Calculate secondary metrics secondary_scores = {} # Token efficiency (mock data for now - will be replaced with real trace collection) mock_traces = predictions.map.with_index do |pred, i| OpenStruct.new(token_usage: 50 + rand(100)) end secondary_scores[:token_efficiency] = calculate_token_efficiency(mock_traces, predictions.size) # Response consistency - use first output field for any signature response_texts = predictions.map do |p| pred = p[:prediction] if pred && pred.respond_to?(:class) && pred.class.respond_to?(:props) # Get first output field name and value first_field = pred.class.props.keys.first first_field ? (pred.send(first_field)&.to_s || '') : '' else '' end end secondary_scores[:consistency] = calculate_consistency(response_texts) # Latency performance latencies = predictions.map { |p| p[:latency] } secondary_scores[:latency] = calculate_latency_score(latencies) # Calculate weighted overall score overall_score = calculate_overall_score(primary_score, secondary_scores) FitnessScore.new( primary_score: primary_score, secondary_scores: secondary_scores, overall_score: overall_score, metadata: { evaluation_time: Time.now - start_time, examples_count: trainset.size, errors_count: predictions.count { |p| p[:error] } } ) end |
#rank_candidates(scores) ⇒ Object
1748 1749 1750 |
# File 'lib/dspy/teleprompt/gepa.rb', line 1748 def rank_candidates(scores) scores.each_with_index.sort_by { |score, _| -score.overall_score }.map(&:last) end |