Class: DSPy::Teleprompt::GEPA::ReflectionEngine

Inherits:

Object

Object
DSPy::Teleprompt::GEPA::ReflectionEngine

show all

Extended by:: T::Sig

Defined in:: lib/dspy/teleprompt/gepa.rb

Overview

ReflectionEngine performs natural language reflection on execution traces This is the core component that analyzes traces and generates improvement insights

Instance Attribute Summary collapse

#config ⇒ Object readonly

Returns the value of attribute config.

Instance Method Summary collapse

Constructor Details

#initialize(config = nil) ⇒ `ReflectionEngine`

Returns a new instance of ReflectionEngine.



379
380
381

# File 'lib/dspy/teleprompt/gepa.rb', line 379

def initialize(config = nil)
  @config = config || GEPAConfig.new
end

Instance Attribute Details

#config ⇒ `Object` (readonly)

Returns the value of attribute config.



376
377
378

# File 'lib/dspy/teleprompt/gepa.rb', line 376

def config
  @config
end

Instance Method Details

#analyze_execution_patterns(traces) ⇒ `Object`

# File 'lib/dspy/teleprompt/gepa.rb', line 432

def analyze_execution_patterns(traces)
  llm_traces = traces.select(&:llm_trace?)
  module_traces = traces.select(&:module_trace?)

  total_tokens = llm_traces.sum(&:token_usage)
  unique_models = llm_traces.map(&:model_name).compact.uniq

  {
    llm_traces_count: llm_traces.size,
    module_traces_count: module_traces.size,
    total_tokens: total_tokens,
    unique_models: unique_models,
    avg_response_length: calculate_avg_response_length(llm_traces),
    trace_timespan: calculate_timespan(traces)
  }
end

#analyze_traces_with_dspy(traces) ⇒ `Object`

# File 'lib/dspy/teleprompt/gepa.rb', line 1064

def analyze_traces_with_dspy(traces)
  predictor = DSPy::Predict.new(create_trace_reflection_signature)
  
  # Prepare input data
  summary = trace_summary_for_reflection(traces)
  insights = extract_optimization_insights(traces)
  insights_text = insights.map { |k, v| "- #{k}: #{v.is_a?(Hash) ? v.values.join(', ') : v}" }.join("\n")
  
  # Get LLM analysis
  predictor.call(
    execution_summary: summary,
    optimization_context: "GEPA genetic algorithm for prompt optimization. Available mutations: rewrite, expand, simplify, combine, rephrase. Goal: improve prompt effectiveness through iterative evolution.",
    key_insights: insights_text,
    sample_traces: format_traces_for_prompt(traces.take(3))
  )
end

#convert_prediction_to_reflection_result(prediction, original_traces) ⇒ `Object`

# File 'lib/dspy/teleprompt/gepa.rb', line 1083

def convert_prediction_to_reflection_result(prediction, original_traces)
  reflection_id = generate_reflection_id
  
  # Extract and validate prediction results
  diagnosis = prediction.diagnosis || 'DSPy reflection analysis'
  improvements = Array(prediction.improvements).select { |i| i.is_a?(String) && !i.strip.empty? }
  confidence = [[prediction.confidence&.to_f || 0.0, 1.0].min, 0.0].max
  reasoning = prediction.reasoning || 'DSPy-based analysis of execution traces'
  
  # Validate mutation suggestions
  valid_mutations = Array(prediction.suggested_mutations).filter_map do |mut|
    mutation_symbol = mut.to_s.downcase.to_sym
    if [:rewrite, :expand, :simplify, :combine, :rephrase].include?(mutation_symbol)
      mutation_symbol
    end
  end.uniq
  
  # Ensure we have at least one valid mutation suggestion
  valid_mutations = [:rewrite] if valid_mutations.empty?
  
  ReflectionResult.new(
    trace_id: reflection_id,
    diagnosis: diagnosis,
    improvements: improvements,
    confidence: confidence,
    reasoning: reasoning,
    suggested_mutations: valid_mutations,
    metadata: {
      reflection_model: @config.reflection_lm,
      analysis_timestamp: Time.now,
      trace_count: original_traces.size,
      token_usage: estimate_token_usage(prediction.to_s),
      llm_based: true,
      dspy_prediction: true,
      insights: {
        pattern_detected: prediction.pattern_detected || "unknown_pattern",
        optimization_opportunity: prediction.optimization_opportunity || "general_optimization"
      }
    }
  )
end

#create_trace_reflection_signature ⇒ `Object`

# File 'lib/dspy/teleprompt/gepa.rb', line 1039

def create_trace_reflection_signature
  @trace_reflection_signature ||= Class.new(DSPy::Signature) do
    description "Analyze execution traces from GEPA optimization system and provide actionable optimization insights"
    
    input do
      const :execution_summary, String, description: "Summary of execution traces and performance patterns"
      const :optimization_context, String, description: "Context about the genetic algorithm optimization goals"
      const :key_insights, String, description: "Key insights extracted from trace analysis" 
      const :sample_traces, String, description: "Representative execution trace samples"
    end
    
    output do
      const :diagnosis, String, description: "Brief description of execution patterns and issues identified"
      const :improvements, T::Array[String], description: "List of 2-4 specific actionable improvement suggestions"
      const :confidence, Float, description: "Confidence level in analysis (0.0 to 1.0)"
      const :reasoning, String, description: "Detailed reasoning process for the analysis"
      const :suggested_mutations, T::Array[String], description: "List of 2-3 most beneficial mutation types from: rewrite, expand, simplify, combine, rephrase"
      const :pattern_detected, String, description: "Primary pattern identified in execution traces"
      const :optimization_opportunity, String, description: "Key area identified for performance improvement"
    end
  end
end

#extract_optimization_insights(traces) ⇒ `Object`

# File 'lib/dspy/teleprompt/gepa.rb', line 756

def extract_optimization_insights(traces)
  llm_traces = traces.select(&:llm_trace?)
  
  insights = {
    token_efficiency: analyze_token_efficiency(llm_traces),
    response_quality: analyze_response_quality(llm_traces),
    model_consistency: analyze_model_consistency(llm_traces)
  }
  
  insights
end

#generate_improvement_suggestions(patterns) ⇒ `Object`

# File 'lib/dspy/teleprompt/gepa.rb', line 451

def generate_improvement_suggestions(patterns)
  suggestions = []

  if patterns[:total_tokens] > 500
    suggestions << 'Consider reducing prompt length to lower token usage'
  end

  if patterns[:avg_response_length] < 10
    suggestions << 'Responses seem brief - consider asking for more detailed explanations'
  end

  if patterns[:llm_traces_count] > patterns[:module_traces_count] * 3
    suggestions << 'High LLM usage detected - consider optimizing reasoning chains'
  end

  if patterns[:unique_models].size > 1
    suggestions << 'Multiple models used - consider standardizing on one model for consistency'
  end

  suggestions << 'Add step-by-step reasoning instructions' if suggestions.empty?
  suggestions
end

#generate_reflection_prompt(traces) ⇒ `Object`

# File 'lib/dspy/teleprompt/gepa.rb', line 596

def generate_reflection_prompt(traces)
  if traces.empty?
    return <<~PROMPT
      You are analyzing execution traces for a genetic algorithm-based prompt optimization system called GEPA.
      
      **Task**: Analyze execution patterns and provide optimization recommendations.
      
      **Context**: No execution traces available.
      
      Please provide your analysis in the following JSON format:
      {
        "diagnosis": "Brief description of what you observed",
        "improvements": ["List of actionable improvement suggestions"],
        "confidence": 0.0,
        "reasoning": "Your reasoning process",
        "suggested_mutations": ["expand", "rewrite", "simplify", "combine", "rephrase"],
        "insights": {
          "pattern_detected": "no_data",
          "optimization_opportunity": "data_collection"
        }
      }
    PROMPT
  end
  
  summary = trace_summary_for_reflection(traces)
  insights = extract_optimization_insights(traces)
  
  <<~PROMPT
    You are analyzing execution traces for a genetic algorithm-based prompt optimization system called GEPA.
    
    **Task**: Analyze execution patterns and provide optimization recommendations for prompt evolution.
    
    **Execution Summary**:
    #{summary}
    
    **Optimization Context**:
    - This is part of a genetic algorithm for prompt optimization
    - Available mutation types: rewrite, expand, simplify, combine, rephrase
    - Goal is to improve prompt effectiveness through iterative evolution
    - Focus on actionable insights that can guide mutation and crossover operations
    
    **Key Optimization Insights**:
    #{insights.map { |k, v| "- #{k}: #{v.is_a?(Hash) ? v.values.join(', ') : v}" }.join("\n")}
    
    **Sample Traces**:
    #{format_traces_for_prompt(traces.take(3))}
    
    Please analyze these execution patterns and provide optimization recommendations in the following JSON format:
    {
      "diagnosis": "Brief description of execution patterns and issues identified",
      "improvements": ["List of 2-4 specific, actionable improvement suggestions"],
      "confidence": 0.85,
      "reasoning": "Your detailed reasoning process for the analysis",
      "suggested_mutations": ["List of 2-3 mutation types that would be most beneficial"],
      "insights": {
        "pattern_detected": "primary_pattern_identified", 
        "optimization_opportunity": "key_area_for_improvement"
      }
    }
    
    Focus on practical recommendations that will improve prompt performance through genetic algorithm evolution.
  PROMPT
end

#parse_llm_reflection(response_text, original_traces) ⇒ `Object`

# File 'lib/dspy/teleprompt/gepa.rb', line 662

def parse_llm_reflection(response_text, original_traces)
  reflection_id = generate_reflection_id
  
  begin
    parsed = JSON.parse(response_text)
    
    # Extract and validate components
    diagnosis = parsed['diagnosis'] || 'LLM reflection analysis'
    improvements = Array(parsed['improvements']).select { |i| i.is_a?(String) && !i.strip.empty? }
    confidence = [parsed['confidence'].to_f, 1.0].min
    reasoning = parsed['reasoning'] || 'LLM-based analysis of execution traces'
    
    # Validate and sanitize mutation suggestions
    raw_mutations = Array(parsed['suggested_mutations'])
    valid_mutations = raw_mutations.filter_map do |mut|
      mutation_symbol = mut.to_s.downcase.to_sym
      if [:rewrite, :expand, :simplify, :combine, :rephrase].include?(mutation_symbol)
        mutation_symbol
      end
    end.uniq
    
    # Ensure we have at least one valid mutation suggestion
    valid_mutations = [:rewrite] if valid_mutations.empty?
    
    ReflectionResult.new(
      trace_id: reflection_id,
      diagnosis: diagnosis,
      improvements: improvements,
      confidence: confidence,
      reasoning: reasoning,
      suggested_mutations: valid_mutations,
      metadata: {
        reflection_model: @config.reflection_lm,
        analysis_timestamp: Time.now,
        trace_count: original_traces.size,
        token_usage: estimate_token_usage(response_text),
        llm_based: true,
        insights: parsed['insights'] || {}
      }
    )
    
  rescue JSON::ParserError => e
    # Handle malformed JSON response
    ReflectionResult.new(
      trace_id: reflection_id,
      diagnosis: "LLM reflection JSON parsing error: #{e.message}",
      improvements: ['Review prompt structure and LLM response format'],
      confidence: 0.3,
      reasoning: "Failed to parse LLM reflection response as valid JSON",
      suggested_mutations: [:rewrite],
      metadata: {
        reflection_model: @config.reflection_lm,
        analysis_timestamp: Time.now,
        trace_count: original_traces.size,
        token_usage: 0,
        parsing_error: e.message,
        raw_response: response_text.length > 500 ? "#{response_text[0..500]}..." : response_text
      }
    )
  end
end

#reflect_on_traces(traces) ⇒ `Object`

# File 'lib/dspy/teleprompt/gepa.rb', line 385

def reflect_on_traces(traces)
  reflection_id = generate_reflection_id

  if traces.empty?
    return ReflectionResult.new(
      trace_id: reflection_id,
      diagnosis: 'No traces available for analysis',
      improvements: [],
      confidence: 0.0,
      reasoning: 'Cannot provide reflection without execution traces',
      suggested_mutations: [],
      metadata: {
        reflection_model: @config.reflection_lm,
        analysis_timestamp: Time.now,
        trace_count: 0
      }
    )
  end

  patterns = analyze_execution_patterns(traces)
  improvements = generate_improvement_suggestions(patterns)
  mutations = suggest_mutations(patterns)
  
  # For Phase 1, we generate a simple rule-based analysis
  # Future phases will use LLM-based reflection
  diagnosis = generate_diagnosis(patterns)
  reasoning = generate_reasoning(patterns, traces)
  confidence = calculate_confidence(patterns)

  ReflectionResult.new(
    trace_id: reflection_id,
    diagnosis: diagnosis,
    improvements: improvements,
    confidence: confidence,
    reasoning: reasoning,
    suggested_mutations: mutations,
    metadata: {
      reflection_model: @config.reflection_lm,
      analysis_timestamp: Time.now,
      trace_count: traces.size,
      token_usage: 0 # Phase 1 doesn't use actual LLM reflection
    }
  )
end

#reflect_with_llm(traces) ⇒ `Object`

# File 'lib/dspy/teleprompt/gepa.rb', line 569

def reflect_with_llm(traces)
  return reflect_on_traces(traces) if traces.empty?
  
  begin
    # Use DSPy::Predict for analysis instead of raw prompts
    prediction = analyze_traces_with_dspy(traces)
    convert_prediction_to_reflection_result(prediction, traces)
  rescue => e
    # Fallback to rule-based analysis on LLM failure
    fallback_result = reflect_on_traces(traces)
    fallback_result.class.new(
      trace_id: fallback_result.trace_id,
      diagnosis: "LLM reflection failed (#{e.message}), using fallback analysis: #{fallback_result.diagnosis}",
      improvements: fallback_result.improvements,
      confidence: [fallback_result.confidence * 0.5, 0.5].min,
      reasoning: "Fallback to rule-based analysis after LLM error: #{fallback_result.reasoning}",
      suggested_mutations: fallback_result.suggested_mutations,
      metadata: fallback_result.metadata.merge(
        llm_error: e.message,
        fallback_used: true
      )
    )
  end
end

#reflection_with_context(traces, context) ⇒ `Object`

# File 'lib/dspy/teleprompt/gepa.rb', line 770

def reflection_with_context(traces, context)
  base_result = reflect_with_llm(traces)
  
  # Incorporate context into reasoning
  context_reasoning = "Generation #{context[:generation] || 'unknown'} analysis. "
  context_reasoning += "Population size: #{context[:population_size] || 'unknown'}. "
  
  if context[:current_best_score]
    context_reasoning += "Current best score: #{context[:current_best_score]}. "
  end
  
  # Adjust mutation suggestions based on history
  adjusted_mutations = adjust_mutations_for_history(
    base_result.suggested_mutations,
    context[:mutation_history] || [],
    context[:recent_performance_trend]
  )
  
  ReflectionResult.new(
    trace_id: base_result.trace_id,
    diagnosis: base_result.diagnosis,
    improvements: base_result.improvements,
    confidence: base_result.confidence,
    reasoning: context_reasoning + base_result.reasoning,
    suggested_mutations: adjusted_mutations,
    metadata: base_result.metadata.merge(optimization_context: context)
  )
end

#suggest_mutations(patterns) ⇒ `Object`

# File 'lib/dspy/teleprompt/gepa.rb', line 476

def suggest_mutations(patterns)
  mutations = []

  avg_length = patterns[:avg_response_length] || 0
  total_tokens = patterns[:total_tokens] || 0
  llm_count = patterns[:llm_traces_count] || 0

  mutations << :expand if avg_length < 15
  mutations << :simplify if total_tokens > 300
  mutations << :combine if llm_count > 2
  mutations << :rewrite if llm_count == 1
  mutations << :rephrase if mutations.empty?
  
  mutations.uniq
end

#trace_summary_for_reflection(traces) ⇒ `Object`

# File 'lib/dspy/teleprompt/gepa.rb', line 726

def trace_summary_for_reflection(traces)
  return "No execution traces available" if traces.empty?
  
  llm_traces = traces.select(&:llm_trace?)
  module_traces = traces.select(&:module_trace?)
  
  total_tokens = llm_traces.sum(&:token_usage)
  unique_models = llm_traces.map(&:model_name).compact.uniq
  timespan = calculate_timespan(traces)
  
  avg_response_length = if llm_traces.any?
    total_length = llm_traces.sum { |t| t.response_text&.length || 0 }
    total_length / llm_traces.size
  else
    0
  end
  
  <<~SUMMARY
    Total traces: #{traces.size}
    LLM interactions: #{llm_traces.size}
    Module calls: #{module_traces.size}
    Total tokens: #{total_tokens}
    Models used: #{unique_models.join(', ')}
    Average response length: #{avg_response_length} characters
    Execution timespan: #{timespan.round(2)} seconds
  SUMMARY
end

Class: DSPy::Teleprompt::GEPA::ReflectionEngine

Overview

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(config = nil) ⇒ ReflectionEngine

Instance Attribute Details

#config ⇒ Object (readonly)

Instance Method Details

#analyze_execution_patterns(traces) ⇒ Object

#analyze_traces_with_dspy(traces) ⇒ Object

#convert_prediction_to_reflection_result(prediction, original_traces) ⇒ Object

#create_trace_reflection_signature ⇒ Object

#extract_optimization_insights(traces) ⇒ Object

#generate_improvement_suggestions(patterns) ⇒ Object

#generate_reflection_prompt(traces) ⇒ Object

#parse_llm_reflection(response_text, original_traces) ⇒ Object

#reflect_on_traces(traces) ⇒ Object

#reflect_with_llm(traces) ⇒ Object

#reflection_with_context(traces, context) ⇒ Object

#suggest_mutations(patterns) ⇒ Object

#trace_summary_for_reflection(traces) ⇒ Object