Class: DSPy::Teleprompt::GEPA::ReflectionEngine
- Inherits:
-
Object
- Object
- DSPy::Teleprompt::GEPA::ReflectionEngine
- Extended by:
- T::Sig
- Defined in:
- lib/dspy/teleprompt/gepa.rb
Overview
ReflectionEngine performs natural language reflection on execution traces This is the core component that analyzes traces and generates improvement insights
Instance Attribute Summary collapse
-
#config ⇒ Object
readonly
Returns the value of attribute config.
Instance Method Summary collapse
- #analyze_execution_patterns(traces) ⇒ Object
- #analyze_traces_with_dspy(traces) ⇒ Object
- #convert_prediction_to_reflection_result(prediction, original_traces) ⇒ Object
- #create_trace_reflection_signature ⇒ Object
- #extract_optimization_insights(traces) ⇒ Object
- #generate_improvement_suggestions(patterns) ⇒ Object
- #generate_reflection_prompt(traces) ⇒ Object
-
#initialize(config = nil) ⇒ ReflectionEngine
constructor
A new instance of ReflectionEngine.
- #parse_llm_reflection(response_text, original_traces) ⇒ Object
- #reflect_on_traces(traces) ⇒ Object
- #reflect_with_llm(traces) ⇒ Object
- #reflection_with_context(traces, context) ⇒ Object
- #suggest_mutations(patterns) ⇒ Object
- #trace_summary_for_reflection(traces) ⇒ Object
Constructor Details
#initialize(config = nil) ⇒ ReflectionEngine
Returns a new instance of ReflectionEngine.
379 380 381 |
# File 'lib/dspy/teleprompt/gepa.rb', line 379 def initialize(config = nil) @config = config || GEPAConfig.new end |
Instance Attribute Details
#config ⇒ Object (readonly)
Returns the value of attribute config.
376 377 378 |
# File 'lib/dspy/teleprompt/gepa.rb', line 376 def config @config end |
Instance Method Details
#analyze_execution_patterns(traces) ⇒ Object
432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 |
# File 'lib/dspy/teleprompt/gepa.rb', line 432 def analyze_execution_patterns(traces) llm_traces = traces.select(&:llm_trace?) module_traces = traces.select(&:module_trace?) total_tokens = llm_traces.sum(&:token_usage) unique_models = llm_traces.map(&:model_name).compact.uniq { llm_traces_count: llm_traces.size, module_traces_count: module_traces.size, total_tokens: total_tokens, unique_models: unique_models, avg_response_length: calculate_avg_response_length(llm_traces), trace_timespan: calculate_timespan(traces) } end |
#analyze_traces_with_dspy(traces) ⇒ Object
1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 |
# File 'lib/dspy/teleprompt/gepa.rb', line 1064 def analyze_traces_with_dspy(traces) predictor = DSPy::Predict.new(create_trace_reflection_signature) # Prepare input data summary = trace_summary_for_reflection(traces) insights = extract_optimization_insights(traces) insights_text = insights.map { |k, v| "- #{k}: #{v.is_a?(Hash) ? v.values.join(', ') : v}" }.join("\n") # Get LLM analysis predictor.call( execution_summary: summary, optimization_context: "GEPA genetic algorithm for prompt optimization. Available mutations: rewrite, expand, simplify, combine, rephrase. Goal: improve prompt effectiveness through iterative evolution.", key_insights: insights_text, sample_traces: format_traces_for_prompt(traces.take(3)) ) end |
#convert_prediction_to_reflection_result(prediction, original_traces) ⇒ Object
1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 |
# File 'lib/dspy/teleprompt/gepa.rb', line 1083 def convert_prediction_to_reflection_result(prediction, original_traces) reflection_id = generate_reflection_id # Extract and validate prediction results diagnosis = prediction.diagnosis || 'DSPy reflection analysis' improvements = Array(prediction.improvements).select { |i| i.is_a?(String) && !i.strip.empty? } confidence = [[prediction.confidence&.to_f || 0.0, 1.0].min, 0.0].max reasoning = prediction.reasoning || 'DSPy-based analysis of execution traces' # Validate mutation suggestions valid_mutations = Array(prediction.suggested_mutations).filter_map do |mut| mutation_symbol = mut.to_s.downcase.to_sym if [:rewrite, :expand, :simplify, :combine, :rephrase].include?(mutation_symbol) mutation_symbol end end.uniq # Ensure we have at least one valid mutation suggestion valid_mutations = [:rewrite] if valid_mutations.empty? ReflectionResult.new( trace_id: reflection_id, diagnosis: diagnosis, improvements: improvements, confidence: confidence, reasoning: reasoning, suggested_mutations: valid_mutations, metadata: { reflection_model: @config.reflection_lm, analysis_timestamp: Time.now, trace_count: original_traces.size, token_usage: estimate_token_usage(prediction.to_s), llm_based: true, dspy_prediction: true, insights: { pattern_detected: prediction.pattern_detected || "unknown_pattern", optimization_opportunity: prediction.optimization_opportunity || "general_optimization" } } ) end |
#create_trace_reflection_signature ⇒ Object
1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 |
# File 'lib/dspy/teleprompt/gepa.rb', line 1039 def create_trace_reflection_signature @trace_reflection_signature ||= Class.new(DSPy::Signature) do description "Analyze execution traces from GEPA optimization system and provide actionable optimization insights" input do const :execution_summary, String, description: "Summary of execution traces and performance patterns" const :optimization_context, String, description: "Context about the genetic algorithm optimization goals" const :key_insights, String, description: "Key insights extracted from trace analysis" const :sample_traces, String, description: "Representative execution trace samples" end output do const :diagnosis, String, description: "Brief description of execution patterns and issues identified" const :improvements, T::Array[String], description: "List of 2-4 specific actionable improvement suggestions" const :confidence, Float, description: "Confidence level in analysis (0.0 to 1.0)" const :reasoning, String, description: "Detailed reasoning process for the analysis" const :suggested_mutations, T::Array[String], description: "List of 2-3 most beneficial mutation types from: rewrite, expand, simplify, combine, rephrase" const :pattern_detected, String, description: "Primary pattern identified in execution traces" const :optimization_opportunity, String, description: "Key area identified for performance improvement" end end end |
#extract_optimization_insights(traces) ⇒ Object
756 757 758 759 760 761 762 763 764 765 766 |
# File 'lib/dspy/teleprompt/gepa.rb', line 756 def extract_optimization_insights(traces) llm_traces = traces.select(&:llm_trace?) insights = { token_efficiency: analyze_token_efficiency(llm_traces), response_quality: analyze_response_quality(llm_traces), model_consistency: analyze_model_consistency(llm_traces) } insights end |
#generate_improvement_suggestions(patterns) ⇒ Object
451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 |
# File 'lib/dspy/teleprompt/gepa.rb', line 451 def generate_improvement_suggestions(patterns) suggestions = [] if patterns[:total_tokens] > 500 suggestions << 'Consider reducing prompt length to lower token usage' end if patterns[:avg_response_length] < 10 suggestions << 'Responses seem brief - consider asking for more detailed explanations' end if patterns[:llm_traces_count] > patterns[:module_traces_count] * 3 suggestions << 'High LLM usage detected - consider optimizing reasoning chains' end if patterns[:unique_models].size > 1 suggestions << 'Multiple models used - consider standardizing on one model for consistency' end suggestions << 'Add step-by-step reasoning instructions' if suggestions.empty? suggestions end |
#generate_reflection_prompt(traces) ⇒ Object
596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 |
# File 'lib/dspy/teleprompt/gepa.rb', line 596 def generate_reflection_prompt(traces) if traces.empty? return <<~PROMPT You are analyzing execution traces for a genetic algorithm-based prompt optimization system called GEPA. **Task**: Analyze execution patterns and provide optimization recommendations. **Context**: No execution traces available. Please provide your analysis in the following JSON format: { "diagnosis": "Brief description of what you observed", "improvements": ["List of actionable improvement suggestions"], "confidence": 0.0, "reasoning": "Your reasoning process", "suggested_mutations": ["expand", "rewrite", "simplify", "combine", "rephrase"], "insights": { "pattern_detected": "no_data", "optimization_opportunity": "data_collection" } } PROMPT end summary = trace_summary_for_reflection(traces) insights = extract_optimization_insights(traces) <<~PROMPT You are analyzing execution traces for a genetic algorithm-based prompt optimization system called GEPA. **Task**: Analyze execution patterns and provide optimization recommendations for prompt evolution. **Execution Summary**: #{summary} **Optimization Context**: - This is part of a genetic algorithm for prompt optimization - Available mutation types: rewrite, expand, simplify, combine, rephrase - Goal is to improve prompt effectiveness through iterative evolution - Focus on actionable insights that can guide mutation and crossover operations **Key Optimization Insights**: #{insights.map { |k, v| "- #{k}: #{v.is_a?(Hash) ? v.values.join(', ') : v}" }.join("\n")} **Sample Traces**: #{format_traces_for_prompt(traces.take(3))} Please analyze these execution patterns and provide optimization recommendations in the following JSON format: { "diagnosis": "Brief description of execution patterns and issues identified", "improvements": ["List of 2-4 specific, actionable improvement suggestions"], "confidence": 0.85, "reasoning": "Your detailed reasoning process for the analysis", "suggested_mutations": ["List of 2-3 mutation types that would be most beneficial"], "insights": { "pattern_detected": "primary_pattern_identified", "optimization_opportunity": "key_area_for_improvement" } } Focus on practical recommendations that will improve prompt performance through genetic algorithm evolution. PROMPT end |
#parse_llm_reflection(response_text, original_traces) ⇒ Object
662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 |
# File 'lib/dspy/teleprompt/gepa.rb', line 662 def parse_llm_reflection(response_text, original_traces) reflection_id = generate_reflection_id begin parsed = JSON.parse(response_text) # Extract and validate components diagnosis = parsed['diagnosis'] || 'LLM reflection analysis' improvements = Array(parsed['improvements']).select { |i| i.is_a?(String) && !i.strip.empty? } confidence = [parsed['confidence'].to_f, 1.0].min reasoning = parsed['reasoning'] || 'LLM-based analysis of execution traces' # Validate and sanitize mutation suggestions raw_mutations = Array(parsed['suggested_mutations']) valid_mutations = raw_mutations.filter_map do |mut| mutation_symbol = mut.to_s.downcase.to_sym if [:rewrite, :expand, :simplify, :combine, :rephrase].include?(mutation_symbol) mutation_symbol end end.uniq # Ensure we have at least one valid mutation suggestion valid_mutations = [:rewrite] if valid_mutations.empty? ReflectionResult.new( trace_id: reflection_id, diagnosis: diagnosis, improvements: improvements, confidence: confidence, reasoning: reasoning, suggested_mutations: valid_mutations, metadata: { reflection_model: @config.reflection_lm, analysis_timestamp: Time.now, trace_count: original_traces.size, token_usage: estimate_token_usage(response_text), llm_based: true, insights: parsed['insights'] || {} } ) rescue JSON::ParserError => e # Handle malformed JSON response ReflectionResult.new( trace_id: reflection_id, diagnosis: "LLM reflection JSON parsing error: #{e.}", improvements: ['Review prompt structure and LLM response format'], confidence: 0.3, reasoning: "Failed to parse LLM reflection response as valid JSON", suggested_mutations: [:rewrite], metadata: { reflection_model: @config.reflection_lm, analysis_timestamp: Time.now, trace_count: original_traces.size, token_usage: 0, parsing_error: e., raw_response: response_text.length > 500 ? "#{response_text[0..500]}..." : response_text } ) end end |
#reflect_on_traces(traces) ⇒ Object
385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 |
# File 'lib/dspy/teleprompt/gepa.rb', line 385 def reflect_on_traces(traces) reflection_id = generate_reflection_id if traces.empty? return ReflectionResult.new( trace_id: reflection_id, diagnosis: 'No traces available for analysis', improvements: [], confidence: 0.0, reasoning: 'Cannot provide reflection without execution traces', suggested_mutations: [], metadata: { reflection_model: @config.reflection_lm, analysis_timestamp: Time.now, trace_count: 0 } ) end patterns = analyze_execution_patterns(traces) improvements = generate_improvement_suggestions(patterns) mutations = suggest_mutations(patterns) # For Phase 1, we generate a simple rule-based analysis # Future phases will use LLM-based reflection diagnosis = generate_diagnosis(patterns) reasoning = generate_reasoning(patterns, traces) confidence = calculate_confidence(patterns) ReflectionResult.new( trace_id: reflection_id, diagnosis: diagnosis, improvements: improvements, confidence: confidence, reasoning: reasoning, suggested_mutations: mutations, metadata: { reflection_model: @config.reflection_lm, analysis_timestamp: Time.now, trace_count: traces.size, token_usage: 0 # Phase 1 doesn't use actual LLM reflection } ) end |
#reflect_with_llm(traces) ⇒ Object
569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 |
# File 'lib/dspy/teleprompt/gepa.rb', line 569 def reflect_with_llm(traces) return reflect_on_traces(traces) if traces.empty? begin # Use DSPy::Predict for analysis instead of raw prompts prediction = analyze_traces_with_dspy(traces) convert_prediction_to_reflection_result(prediction, traces) rescue => e # Fallback to rule-based analysis on LLM failure fallback_result = reflect_on_traces(traces) fallback_result.class.new( trace_id: fallback_result.trace_id, diagnosis: "LLM reflection failed (#{e.}), using fallback analysis: #{fallback_result.diagnosis}", improvements: fallback_result.improvements, confidence: [fallback_result.confidence * 0.5, 0.5].min, reasoning: "Fallback to rule-based analysis after LLM error: #{fallback_result.reasoning}", suggested_mutations: fallback_result.suggested_mutations, metadata: fallback_result..merge( llm_error: e., fallback_used: true ) ) end end |
#reflection_with_context(traces, context) ⇒ Object
770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 |
# File 'lib/dspy/teleprompt/gepa.rb', line 770 def reflection_with_context(traces, context) base_result = reflect_with_llm(traces) # Incorporate context into reasoning context_reasoning = "Generation #{context[:generation] || 'unknown'} analysis. " context_reasoning += "Population size: #{context[:population_size] || 'unknown'}. " if context[:current_best_score] context_reasoning += "Current best score: #{context[:current_best_score]}. " end # Adjust mutation suggestions based on history adjusted_mutations = adjust_mutations_for_history( base_result.suggested_mutations, context[:mutation_history] || [], context[:recent_performance_trend] ) ReflectionResult.new( trace_id: base_result.trace_id, diagnosis: base_result.diagnosis, improvements: base_result.improvements, confidence: base_result.confidence, reasoning: context_reasoning + base_result.reasoning, suggested_mutations: adjusted_mutations, metadata: base_result..merge(optimization_context: context) ) end |
#suggest_mutations(patterns) ⇒ Object
476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 |
# File 'lib/dspy/teleprompt/gepa.rb', line 476 def suggest_mutations(patterns) mutations = [] avg_length = patterns[:avg_response_length] || 0 total_tokens = patterns[:total_tokens] || 0 llm_count = patterns[:llm_traces_count] || 0 mutations << :expand if avg_length < 15 mutations << :simplify if total_tokens > 300 mutations << :combine if llm_count > 2 mutations << :rewrite if llm_count == 1 mutations << :rephrase if mutations.empty? mutations.uniq end |
#trace_summary_for_reflection(traces) ⇒ Object
726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 |
# File 'lib/dspy/teleprompt/gepa.rb', line 726 def trace_summary_for_reflection(traces) return "No execution traces available" if traces.empty? llm_traces = traces.select(&:llm_trace?) module_traces = traces.select(&:module_trace?) total_tokens = llm_traces.sum(&:token_usage) unique_models = llm_traces.map(&:model_name).compact.uniq timespan = calculate_timespan(traces) avg_response_length = if llm_traces.any? total_length = llm_traces.sum { |t| t.response_text&.length || 0 } total_length / llm_traces.size else 0 end <<~SUMMARY Total traces: #{traces.size} LLM interactions: #{llm_traces.size} Module calls: #{module_traces.size} Total tokens: #{total_tokens} Models used: #{unique_models.join(', ')} Average response length: #{avg_response_length} characters Execution timespan: #{timespan.round(2)} seconds SUMMARY end |