Class: DSPy::Teleprompt::GEPA::FitnessEvaluator
- Inherits:
-
Object
- Object
- DSPy::Teleprompt::GEPA::FitnessEvaluator
- Extended by:
- T::Sig
- Defined in:
- lib/dspy/teleprompt/gepa.rb
Overview
FitnessEvaluator provides multi-dimensional evaluation of prompt candidates
Instance Attribute Summary collapse
-
#config ⇒ Object
readonly
Returns the value of attribute config.
-
#primary_metric ⇒ Object
readonly
Returns the value of attribute primary_metric.
-
#secondary_metrics ⇒ Object
readonly
Returns the value of attribute secondary_metrics.
Instance Method Summary collapse
- #batch_evaluate(programs, trainset) ⇒ Object
- #compare_candidates(score1, score2) ⇒ Object
- #evaluate_candidate(program, trainset) ⇒ Object
-
#initialize(primary_metric:, config:, secondary_metrics: nil) ⇒ FitnessEvaluator
constructor
A new instance of FitnessEvaluator.
- #rank_candidates(scores) ⇒ Object
Constructor Details
#initialize(primary_metric:, config:, secondary_metrics: nil) ⇒ FitnessEvaluator
Returns a new instance of FitnessEvaluator.
1393 1394 1395 1396 1397 1398 |
# File 'lib/dspy/teleprompt/gepa.rb', line 1393 def initialize(primary_metric:, config:, secondary_metrics: nil) @primary_metric = primary_metric @config = config @secondary_metrics = secondary_metrics || default_secondary_metrics @trace_collector = TraceCollector.new end |
Instance Attribute Details
#config ⇒ Object (readonly)
Returns the value of attribute config.
1381 1382 1383 |
# File 'lib/dspy/teleprompt/gepa.rb', line 1381 def config @config end |
#primary_metric ⇒ Object (readonly)
Returns the value of attribute primary_metric.
1378 1379 1380 |
# File 'lib/dspy/teleprompt/gepa.rb', line 1378 def primary_metric @primary_metric end |
#secondary_metrics ⇒ Object (readonly)
Returns the value of attribute secondary_metrics.
1384 1385 1386 |
# File 'lib/dspy/teleprompt/gepa.rb', line 1384 def secondary_metrics @secondary_metrics end |
Instance Method Details
#batch_evaluate(programs, trainset) ⇒ Object
1476 1477 1478 |
# File 'lib/dspy/teleprompt/gepa.rb', line 1476 def batch_evaluate(programs, trainset) programs.map { |program| evaluate_candidate(program, trainset) } end |
#compare_candidates(score1, score2) ⇒ Object
1482 1483 1484 |
# File 'lib/dspy/teleprompt/gepa.rb', line 1482 def compare_candidates(score1, score2) score1.overall_score - score2.overall_score end |
#evaluate_candidate(program, trainset) ⇒ Object
1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 |
# File 'lib/dspy/teleprompt/gepa.rb', line 1402 def evaluate_candidate(program, trainset) start_time = Time.now predictions = [] traces = [] # Collect primary metric scores and execution data primary_scores = trainset.map do |example| prediction_start = Time.now prediction = program.call(**example.input_values) prediction_time = Time.now - prediction_start predictions << { prediction: prediction, latency: prediction_time, example: example } @primary_metric.call(example, prediction).to_f rescue => e # Handle prediction errors predictions << { prediction: nil, latency: 0.0, example: example, error: e. } 0.0 end primary_score = primary_scores.sum / primary_scores.size # Calculate secondary metrics secondary_scores = {} # Token efficiency (mock data for now - will be replaced with real trace collection) mock_traces = predictions.map.with_index do |pred, i| OpenStruct.new(token_usage: 50 + rand(100)) end secondary_scores[:token_efficiency] = calculate_token_efficiency(mock_traces, predictions.size) # Response consistency - use first output field for any signature response_texts = predictions.map do |p| pred = p[:prediction] if pred && pred.respond_to?(:class) && pred.class.respond_to?(:props) # Get first output field name and value first_field = pred.class.props.keys.first first_field ? (pred.send(first_field)&.to_s || '') : '' else '' end end secondary_scores[:consistency] = calculate_consistency(response_texts) # Latency performance latencies = predictions.map { |p| p[:latency] } secondary_scores[:latency] = calculate_latency_score(latencies) # Calculate weighted overall score overall_score = calculate_overall_score(primary_score, secondary_scores) FitnessScore.new( primary_score: primary_score, secondary_scores: secondary_scores, overall_score: overall_score, metadata: { evaluation_time: Time.now - start_time, examples_count: trainset.size, errors_count: predictions.count { |p| p[:error] } } ) end |
#rank_candidates(scores) ⇒ Object
1488 1489 1490 |
# File 'lib/dspy/teleprompt/gepa.rb', line 1488 def rank_candidates(scores) scores.each_with_index.sort_by { |score, _| -score.overall_score }.map(&:last) end |