Class: DSPy::Teleprompt::GEPA::ReflectionEngine
- Inherits:
-
Object
- Object
- DSPy::Teleprompt::GEPA::ReflectionEngine
- Extended by:
- T::Sig
- Defined in:
- lib/dspy/teleprompt/gepa.rb
Overview
ReflectionEngine performs natural language reflection on execution traces This is the core component that analyzes traces and generates improvement insights
Instance Attribute Summary collapse
-
#config ⇒ Object
readonly
Returns the value of attribute config.
Instance Method Summary collapse
- #analyze_execution_patterns(traces) ⇒ Object
- #analyze_traces_with_dspy(traces) ⇒ Object
- #convert_prediction_to_reflection_result(prediction, original_traces) ⇒ Object
- #create_trace_reflection_signature ⇒ Object
- #extract_optimization_insights(traces) ⇒ Object
- #generate_improvement_suggestions(patterns) ⇒ Object
- #generate_reflection_prompt(traces) ⇒ Object
-
#initialize(config = nil) ⇒ ReflectionEngine
constructor
A new instance of ReflectionEngine.
- #parse_llm_reflection(response_text, original_traces) ⇒ Object
- #reflect_on_traces(traces) ⇒ Object
- #reflect_with_llm(traces) ⇒ Object
- #reflection_with_context(traces, context) ⇒ Object
- #suggest_mutations(patterns) ⇒ Object
- #trace_summary_for_reflection(traces) ⇒ Object
Constructor Details
#initialize(config = nil) ⇒ ReflectionEngine
Returns a new instance of ReflectionEngine.
380 381 382 |
# File 'lib/dspy/teleprompt/gepa.rb', line 380 def initialize(config = nil) @config = config || GEPAConfig.new end |
Instance Attribute Details
#config ⇒ Object (readonly)
Returns the value of attribute config.
377 378 379 |
# File 'lib/dspy/teleprompt/gepa.rb', line 377 def config @config end |
Instance Method Details
#analyze_execution_patterns(traces) ⇒ Object
433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 |
# File 'lib/dspy/teleprompt/gepa.rb', line 433 def analyze_execution_patterns(traces) llm_traces = traces.select(&:llm_trace?) module_traces = traces.select(&:module_trace?) total_tokens = llm_traces.sum(&:token_usage) unique_models = llm_traces.map(&:model_name).compact.uniq { llm_traces_count: llm_traces.size, module_traces_count: module_traces.size, total_tokens: total_tokens, unique_models: unique_models, avg_response_length: calculate_avg_response_length(llm_traces), trace_timespan: calculate_timespan(traces) } end |
#analyze_traces_with_dspy(traces) ⇒ Object
756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 |
# File 'lib/dspy/teleprompt/gepa.rb', line 756 def analyze_traces_with_dspy(traces) raise ArgumentError, "reflection_lm must be configured on GEPAConfig for LLM-based reflection" unless @config.reflection_lm predictor = DSPy::Predict.new(create_trace_reflection_signature) # Configure predictor to use reflection-specific LM predictor.config.lm = @config.reflection_lm # Prepare input data summary = trace_summary_for_reflection(traces) insights = extract_optimization_insights(traces) insights_text = insights.map { |k, v| "- #{k}: #{v.is_a?(Hash) ? v.values.join(', ') : v}" }.join("\n") # Get LLM analysis T.unsafe(predictor.call( execution_summary: summary, optimization_context: "GEPA genetic algorithm for prompt optimization. Available mutations: rewrite, expand, simplify, combine, rephrase. Goal: improve prompt effectiveness through iterative evolution.", key_insights: insights_text, sample_traces: format_traces_for_prompt(traces.take(3)) )) end |
#convert_prediction_to_reflection_result(prediction, original_traces) ⇒ Object
780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 |
# File 'lib/dspy/teleprompt/gepa.rb', line 780 def convert_prediction_to_reflection_result(prediction, original_traces) reflection_id = generate_reflection_id # Extract and validate prediction results diagnosis = prediction.diagnosis || 'DSPy reflection analysis' improvements = Array(prediction.improvements).select { |i| i.is_a?(String) && !i.strip.empty? } confidence = [[prediction.confidence&.to_f || 0.0, 1.0].min, 0.0].max reasoning = prediction.reasoning || 'DSPy-based analysis of execution traces' # Validate mutation suggestions valid_mutations = Array(prediction.suggested_mutations).filter_map do |mut| mutation_symbol = mut.to_s.downcase.to_sym if [:rewrite, :expand, :simplify, :combine, :rephrase].include?(mutation_symbol) mutation_symbol end end.uniq # Ensure we have at least one valid mutation suggestion valid_mutations = [:rewrite] if valid_mutations.empty? ReflectionResult.new( trace_id: reflection_id, diagnosis: diagnosis, improvements: improvements, confidence: confidence, reasoning: reasoning, suggested_mutations: valid_mutations, metadata: { reflection_model: @config.reflection_lm&.model, analysis_timestamp: Time.now, trace_count: original_traces.size, token_usage: estimate_token_usage(prediction.to_s), llm_based: true, dspy_prediction: true, insights: { pattern_detected: prediction.pattern_detected || "unknown_pattern", optimization_opportunity: prediction.optimization_opportunity || "general_optimization" } } ) end |
#create_trace_reflection_signature ⇒ Object
731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 |
# File 'lib/dspy/teleprompt/gepa.rb', line 731 def create_trace_reflection_signature @trace_reflection_signature ||= Class.new(DSPy::Signature) do description "Analyze execution traces from GEPA optimization system and provide actionable optimization insights" input do const :execution_summary, String, description: "Summary of execution traces and performance patterns" const :optimization_context, String, description: "Context about the genetic algorithm optimization goals" const :key_insights, String, description: "Key insights extracted from trace analysis" const :sample_traces, String, description: "Representative execution trace samples" end output do const :diagnosis, String, description: "Brief description of execution patterns and issues identified" const :improvements, T::Array[String], description: "List of 2-4 specific actionable improvement suggestions" const :confidence, Float, description: "Confidence level in analysis (0.0 to 1.0)" const :reasoning, String, description: "Detailed reasoning process for the analysis" const :suggested_mutations, T::Array[String], description: "List of 2-3 most beneficial mutation types from: rewrite, expand, simplify, combine, rephrase" const :pattern_detected, String, description: "Primary pattern identified in execution traces" const :optimization_opportunity, String, description: "Key area identified for performance improvement" end end end |
#extract_optimization_insights(traces) ⇒ Object
684 685 686 687 688 689 690 691 692 693 694 |
# File 'lib/dspy/teleprompt/gepa.rb', line 684 def extract_optimization_insights(traces) llm_traces = traces.select(&:llm_trace?) insights = { token_efficiency: analyze_token_efficiency(llm_traces), response_quality: analyze_response_quality(llm_traces), model_consistency: analyze_model_consistency(llm_traces) } insights end |
#generate_improvement_suggestions(patterns) ⇒ Object
452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 |
# File 'lib/dspy/teleprompt/gepa.rb', line 452 def generate_improvement_suggestions(patterns) suggestions = [] if patterns[:total_tokens] > 500 suggestions << 'Consider reducing prompt length to lower token usage' end if patterns[:avg_response_length] < 10 suggestions << 'Responses seem brief - consider asking for more detailed explanations' end if patterns[:llm_traces_count] > patterns[:module_traces_count] * 3 suggestions << 'High LLM usage detected - consider optimizing reasoning chains' end if patterns[:unique_models].size > 1 suggestions << 'Multiple models used - consider standardizing on one model for consistency' end suggestions << 'Add step-by-step reasoning instructions' if suggestions.empty? suggestions end |
#generate_reflection_prompt(traces) ⇒ Object
524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 |
# File 'lib/dspy/teleprompt/gepa.rb', line 524 def generate_reflection_prompt(traces) if traces.empty? return " You are analyzing execution traces for a genetic algorithm-based prompt optimization system called GEPA.\n \n **Task**: Analyze execution patterns and provide optimization recommendations.\n \n **Context**: No execution traces available.\n \n Please provide your analysis in the following JSON format:\n {\n \"diagnosis\": \"Brief description of what you observed\",\n \"improvements\": [\"List of actionable improvement suggestions\"],\n \"confidence\": 0.0,\n \"reasoning\": \"Your reasoning process\",\n \"suggested_mutations\": [\"expand\", \"rewrite\", \"simplify\", \"combine\", \"rephrase\"],\n \"insights\": {\n \"pattern_detected\": \"no_data\",\n \"optimization_opportunity\": \"data_collection\"\n }\n }\n PROMPT\n end\n\n summary = trace_summary_for_reflection(traces)\n insights = extract_optimization_insights(traces)\n\n <<~PROMPT\n You are analyzing execution traces for a genetic algorithm-based prompt optimization system called GEPA.\n \n **Task**: Analyze execution patterns and provide optimization recommendations for prompt evolution.\n \n **Execution Summary**:\n \#{summary}\n \n **Optimization Context**:\n - This is part of a genetic algorithm for prompt optimization\n - Available mutation types: rewrite, expand, simplify, combine, rephrase\n - Goal is to improve prompt effectiveness through iterative evolution\n - Focus on actionable insights that can guide mutation and crossover operations\n \n **Key Optimization Insights**:\n \#{insights.map { |k, v| \"- \#{k}: \#{v.is_a?(Hash) ? v.values.join(', ') : v}\" }.join(\"\\n\")}\n \n **Sample Traces**:\n \#{format_traces_for_prompt(traces.take(3))}\n \n Please analyze these execution patterns and provide optimization recommendations in the following JSON format:\n {\n \"diagnosis\": \"Brief description of execution patterns and issues identified\",\n \"improvements\": [\"List of 2-4 specific, actionable improvement suggestions\"],\n \"confidence\": 0.85,\n \"reasoning\": \"Your detailed reasoning process for the analysis\",\n \"suggested_mutations\": [\"List of 2-3 mutation types that would be most beneficial\"],\n \"insights\": {\n \"pattern_detected\": \"primary_pattern_identified\", \n \"optimization_opportunity\": \"key_area_for_improvement\"\n }\n }\n \n Focus on practical recommendations that will improve prompt performance through genetic algorithm evolution.\n PROMPT\nend\n" |
#parse_llm_reflection(response_text, original_traces) ⇒ Object
590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 |
# File 'lib/dspy/teleprompt/gepa.rb', line 590 def parse_llm_reflection(response_text, original_traces) reflection_id = generate_reflection_id begin parsed = JSON.parse(response_text) # Extract and validate components diagnosis = parsed['diagnosis'] || 'LLM reflection analysis' improvements = Array(parsed['improvements']).select { |i| i.is_a?(String) && !i.strip.empty? } confidence = [parsed['confidence'].to_f, 1.0].min reasoning = parsed['reasoning'] || 'LLM-based analysis of execution traces' # Validate and sanitize mutation suggestions raw_mutations = Array(parsed['suggested_mutations']) valid_mutations = raw_mutations.filter_map do |mut| mutation_symbol = mut.to_s.downcase.to_sym if [:rewrite, :expand, :simplify, :combine, :rephrase].include?(mutation_symbol) mutation_symbol end end.uniq # Ensure we have at least one valid mutation suggestion valid_mutations = [:rewrite] if valid_mutations.empty? ReflectionResult.new( trace_id: reflection_id, diagnosis: diagnosis, improvements: improvements, confidence: confidence, reasoning: reasoning, suggested_mutations: valid_mutations, metadata: { reflection_model: @config.reflection_lm&.model, analysis_timestamp: Time.now, trace_count: original_traces.size, token_usage: estimate_token_usage(response_text), llm_based: true, insights: parsed['insights'] || {} } ) rescue JSON::ParserError => e # Handle malformed JSON response ReflectionResult.new( trace_id: reflection_id, diagnosis: "LLM reflection JSON parsing error: #{e.message}", improvements: ['Review prompt structure and LLM response format'], confidence: 0.3, reasoning: "Failed to parse LLM reflection response as valid JSON", suggested_mutations: [:rewrite], metadata: { reflection_model: @config.reflection_lm&.model, analysis_timestamp: Time.now, trace_count: original_traces.size, token_usage: 0, parsing_error: e., raw_response: response_text.length > 500 ? "#{response_text[0..500]}..." : response_text } ) end end |
#reflect_on_traces(traces) ⇒ Object
386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 |
# File 'lib/dspy/teleprompt/gepa.rb', line 386 def reflect_on_traces(traces) reflection_id = generate_reflection_id if traces.empty? return ReflectionResult.new( trace_id: reflection_id, diagnosis: 'No traces available for analysis', improvements: [], confidence: 0.0, reasoning: 'Cannot provide reflection without execution traces', suggested_mutations: [], metadata: { reflection_model: @config.reflection_lm&.model, analysis_timestamp: Time.now, trace_count: 0 } ) end patterns = analyze_execution_patterns(traces) improvements = generate_improvement_suggestions(patterns) mutations = suggest_mutations(patterns) # For Phase 1, we generate a simple rule-based analysis # Future phases will use LLM-based reflection diagnosis = generate_diagnosis(patterns) reasoning = generate_reasoning(patterns, traces) confidence = calculate_confidence(patterns) ReflectionResult.new( trace_id: reflection_id, diagnosis: diagnosis, improvements: improvements, confidence: confidence, reasoning: reasoning, suggested_mutations: mutations, metadata: { reflection_model: @config.reflection_lm&.model, analysis_timestamp: Time.now, trace_count: traces.size, token_usage: 0 # Phase 1 doesn't use actual LLM reflection } ) end |
#reflect_with_llm(traces) ⇒ Object
497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 |
# File 'lib/dspy/teleprompt/gepa.rb', line 497 def reflect_with_llm(traces) return reflect_on_traces(traces) if traces.empty? begin # Use DSPy::Predict for analysis instead of raw prompts prediction = analyze_traces_with_dspy(traces) convert_prediction_to_reflection_result(prediction, traces) rescue => e # Fallback to rule-based analysis on LLM failure fallback_result = reflect_on_traces(traces) fallback_result.class.new( trace_id: fallback_result.trace_id, diagnosis: "LLM reflection failed (#{e.message}), using fallback analysis: #{fallback_result.diagnosis}", improvements: fallback_result.improvements, confidence: [fallback_result.confidence * 0.5, 0.5].min, reasoning: "Fallback to rule-based analysis after LLM error: #{fallback_result.reasoning}", suggested_mutations: fallback_result.suggested_mutations, metadata: fallback_result..merge( llm_error: e., fallback_used: true ) ) end end |
#reflection_with_context(traces, context) ⇒ Object
698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 |
# File 'lib/dspy/teleprompt/gepa.rb', line 698 def reflection_with_context(traces, context) base_result = reflect_with_llm(traces) # Incorporate context into reasoning context_reasoning = "Generation #{context[:generation] || 'unknown'} analysis. " context_reasoning += "Population size: #{context[:population_size] || 'unknown'}. " if context[:current_best_score] context_reasoning += "Current best score: #{context[:current_best_score]}. " end # Adjust mutation suggestions based on history adjusted_mutations = adjust_mutations_for_history( base_result.suggested_mutations, context[:mutation_history] || [], context[:recent_performance_trend] ) ReflectionResult.new( trace_id: base_result.trace_id, diagnosis: base_result.diagnosis, improvements: base_result.improvements, confidence: base_result.confidence, reasoning: context_reasoning + base_result.reasoning, suggested_mutations: adjusted_mutations, metadata: base_result..merge(optimization_context: context) ) end |
#suggest_mutations(patterns) ⇒ Object
477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 |
# File 'lib/dspy/teleprompt/gepa.rb', line 477 def suggest_mutations(patterns) mutations = [] avg_length = patterns[:avg_response_length] || 0 total_tokens = patterns[:total_tokens] || 0 llm_count = patterns[:llm_traces_count] || 0 mutations << :expand if avg_length < 15 mutations << :simplify if total_tokens > 300 mutations << :combine if llm_count > 2 mutations << :rewrite if llm_count == 1 mutations << :rephrase if mutations.empty? mutations.uniq end |
#trace_summary_for_reflection(traces) ⇒ Object
654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 |
# File 'lib/dspy/teleprompt/gepa.rb', line 654 def trace_summary_for_reflection(traces) return "No execution traces available" if traces.empty? llm_traces = traces.select(&:llm_trace?) module_traces = traces.select(&:module_trace?) total_tokens = llm_traces.sum(&:token_usage) unique_models = llm_traces.map(&:model_name).compact.uniq timespan = calculate_timespan(traces) avg_response_length = if llm_traces.any? total_length = llm_traces.sum { |t| t.response_text&.length || 0 } total_length / llm_traces.size else 0 end " Total traces: \#{traces.size}\n LLM interactions: \#{llm_traces.size}\n Module calls: \#{module_traces.size}\n Total tokens: \#{total_tokens}\n Models used: \#{unique_models.join(', ')}\n Average response length: \#{avg_response_length} characters\n Execution timespan: \#{timespan.round(2)} seconds\n SUMMARY\nend\n" |