Class: DSPy::Teleprompt::GEPA::ReflectionEngine

Inherits:

Object

Object
DSPy::Teleprompt::GEPA::ReflectionEngine

show all

Extended by:: T::Sig

Defined in:: lib/dspy/teleprompt/gepa.rb

Overview

ReflectionEngine performs natural language reflection on execution traces This is the core component that analyzes traces and generates improvement insights

Instance Attribute Summary collapse

#config ⇒ Object readonly

Returns the value of attribute config.

Instance Method Summary collapse

Constructor Details

#initialize(config = nil) ⇒ `ReflectionEngine`

Returns a new instance of ReflectionEngine.



380
381
382

# File 'lib/dspy/teleprompt/gepa.rb', line 380

def initialize(config = nil)
  @config = config || GEPAConfig.new
end

Instance Attribute Details

#config ⇒ `Object` (readonly)

Returns the value of attribute config.



377
378
379

# File 'lib/dspy/teleprompt/gepa.rb', line 377

def config
  @config
end

Instance Method Details

#analyze_execution_patterns(traces) ⇒ `Object`

# File 'lib/dspy/teleprompt/gepa.rb', line 433

def analyze_execution_patterns(traces)
  llm_traces = traces.select(&:llm_trace?)
  module_traces = traces.select(&:module_trace?)

  total_tokens = llm_traces.sum(&:token_usage)
  unique_models = llm_traces.map(&:model_name).compact.uniq

  {
    llm_traces_count: llm_traces.size,
    module_traces_count: module_traces.size,
    total_tokens: total_tokens,
    unique_models: unique_models,
    avg_response_length: calculate_avg_response_length(llm_traces),
    trace_timespan: calculate_timespan(traces)
  }
end

#analyze_traces_with_dspy(traces) ⇒ `Object`

Raises:

(ArgumentError)

# File 'lib/dspy/teleprompt/gepa.rb', line 756

def analyze_traces_with_dspy(traces)
  raise ArgumentError, "reflection_lm must be configured on GEPAConfig for LLM-based reflection" unless @config.reflection_lm

  predictor = DSPy::Predict.new(create_trace_reflection_signature)

  # Configure predictor to use reflection-specific LM
  predictor.config.lm = @config.reflection_lm

  # Prepare input data
  summary = trace_summary_for_reflection(traces)
  insights = extract_optimization_insights(traces)
  insights_text = insights.map { |k, v| "- #{k}: #{v.is_a?(Hash) ? v.values.join(', ') : v}" }.join("\n")

  # Get LLM analysis
  T.unsafe(predictor.call(
    execution_summary: summary,
    optimization_context: "GEPA genetic algorithm for prompt optimization. Available mutations: rewrite, expand, simplify, combine, rephrase. Goal: improve prompt effectiveness through iterative evolution.",
    key_insights: insights_text,
    sample_traces: format_traces_for_prompt(traces.take(3))
  ))
end

#convert_prediction_to_reflection_result(prediction, original_traces) ⇒ `Object`

# File 'lib/dspy/teleprompt/gepa.rb', line 780

def convert_prediction_to_reflection_result(prediction, original_traces)
  reflection_id = generate_reflection_id

  # Extract and validate prediction results
  diagnosis = prediction.diagnosis || 'DSPy reflection analysis'
  improvements = Array(prediction.improvements).select { |i| i.is_a?(String) && !i.strip.empty? }
  confidence = [[prediction.confidence&.to_f || 0.0, 1.0].min, 0.0].max
  reasoning = prediction.reasoning || 'DSPy-based analysis of execution traces'

  # Validate mutation suggestions
  valid_mutations = Array(prediction.suggested_mutations).filter_map do |mut|
    mutation_symbol = mut.to_s.downcase.to_sym
    if [:rewrite, :expand, :simplify, :combine, :rephrase].include?(mutation_symbol)
      mutation_symbol
    end
  end.uniq

  # Ensure we have at least one valid mutation suggestion
  valid_mutations = [:rewrite] if valid_mutations.empty?

  ReflectionResult.new(
    trace_id: reflection_id,
    diagnosis: diagnosis,
    improvements: improvements,
    confidence: confidence,
    reasoning: reasoning,
    suggested_mutations: valid_mutations,
    metadata: {
      reflection_model: @config.reflection_lm&.model,
      analysis_timestamp: Time.now,
      trace_count: original_traces.size,
      token_usage: estimate_token_usage(prediction.to_s),
      llm_based: true,
      dspy_prediction: true,
      insights: {
        pattern_detected: prediction.pattern_detected || "unknown_pattern",
        optimization_opportunity: prediction.optimization_opportunity || "general_optimization"
      }
    }
  )
end

#create_trace_reflection_signature ⇒ `Object`

# File 'lib/dspy/teleprompt/gepa.rb', line 731

def create_trace_reflection_signature
  @trace_reflection_signature ||= Class.new(DSPy::Signature) do
    description "Analyze execution traces from GEPA optimization system and provide actionable optimization insights"

    input do
      const :execution_summary, String, description: "Summary of execution traces and performance patterns"
      const :optimization_context, String, description: "Context about the genetic algorithm optimization goals"
      const :key_insights, String, description: "Key insights extracted from trace analysis"
      const :sample_traces, String, description: "Representative execution trace samples"
    end

    output do
      const :diagnosis, String, description: "Brief description of execution patterns and issues identified"
      const :improvements, T::Array[String], description: "List of 2-4 specific actionable improvement suggestions"
      const :confidence, Float, description: "Confidence level in analysis (0.0 to 1.0)"
      const :reasoning, String, description: "Detailed reasoning process for the analysis"
      const :suggested_mutations, T::Array[String], description: "List of 2-3 most beneficial mutation types from: rewrite, expand, simplify, combine, rephrase"
      const :pattern_detected, String, description: "Primary pattern identified in execution traces"
      const :optimization_opportunity, String, description: "Key area identified for performance improvement"
    end
  end
end

#extract_optimization_insights(traces) ⇒ `Object`

# File 'lib/dspy/teleprompt/gepa.rb', line 684

def extract_optimization_insights(traces)
  llm_traces = traces.select(&:llm_trace?)

  insights = {
    token_efficiency: analyze_token_efficiency(llm_traces),
    response_quality: analyze_response_quality(llm_traces),
    model_consistency: analyze_model_consistency(llm_traces)
  }

  insights
end

#generate_improvement_suggestions(patterns) ⇒ `Object`

# File 'lib/dspy/teleprompt/gepa.rb', line 452

def generate_improvement_suggestions(patterns)
  suggestions = []

  if patterns[:total_tokens] > 500
    suggestions << 'Consider reducing prompt length to lower token usage'
  end

  if patterns[:avg_response_length] < 10
    suggestions << 'Responses seem brief - consider asking for more detailed explanations'
  end

  if patterns[:llm_traces_count] > patterns[:module_traces_count] * 3
    suggestions << 'High LLM usage detected - consider optimizing reasoning chains'
  end

  if patterns[:unique_models].size > 1
    suggestions << 'Multiple models used - consider standardizing on one model for consistency'
  end

  suggestions << 'Add step-by-step reasoning instructions' if suggestions.empty?
  suggestions
end

#generate_reflection_prompt(traces) ⇒ `Object`

# File 'lib/dspy/teleprompt/gepa.rb', line 524

def generate_reflection_prompt(traces)
  if traces.empty?
    return "      You are analyzing execution traces for a genetic algorithm-based prompt optimization system called GEPA.\n      \n      **Task**: Analyze execution patterns and provide optimization recommendations.\n      \n      **Context**: No execution traces available.\n      \n      Please provide your analysis in the following JSON format:\n      {\n        \"diagnosis\": \"Brief description of what you observed\",\n        \"improvements\": [\"List of actionable improvement suggestions\"],\n        \"confidence\": 0.0,\n        \"reasoning\": \"Your reasoning process\",\n        \"suggested_mutations\": [\"expand\", \"rewrite\", \"simplify\", \"combine\", \"rephrase\"],\n        \"insights\": {\n          \"pattern_detected\": \"no_data\",\n          \"optimization_opportunity\": \"data_collection\"\n        }\n      }\n    PROMPT\n  end\n\n  summary = trace_summary_for_reflection(traces)\n  insights = extract_optimization_insights(traces)\n\n  <<~PROMPT\n    You are analyzing execution traces for a genetic algorithm-based prompt optimization system called GEPA.\n    \n    **Task**: Analyze execution patterns and provide optimization recommendations for prompt evolution.\n    \n    **Execution Summary**:\n    \#{summary}\n    \n    **Optimization Context**:\n    - This is part of a genetic algorithm for prompt optimization\n    - Available mutation types: rewrite, expand, simplify, combine, rephrase\n    - Goal is to improve prompt effectiveness through iterative evolution\n    - Focus on actionable insights that can guide mutation and crossover operations\n    \n    **Key Optimization Insights**:\n    \#{insights.map { |k, v| \"- \#{k}: \#{v.is_a?(Hash) ? v.values.join(', ') : v}\" }.join(\"\\n\")}\n    \n    **Sample Traces**:\n    \#{format_traces_for_prompt(traces.take(3))}\n    \n    Please analyze these execution patterns and provide optimization recommendations in the following JSON format:\n    {\n      \"diagnosis\": \"Brief description of execution patterns and issues identified\",\n      \"improvements\": [\"List of 2-4 specific, actionable improvement suggestions\"],\n      \"confidence\": 0.85,\n      \"reasoning\": \"Your detailed reasoning process for the analysis\",\n      \"suggested_mutations\": [\"List of 2-3 mutation types that would be most beneficial\"],\n      \"insights\": {\n        \"pattern_detected\": \"primary_pattern_identified\", \n        \"optimization_opportunity\": \"key_area_for_improvement\"\n      }\n    }\n    \n    Focus on practical recommendations that will improve prompt performance through genetic algorithm evolution.\n  PROMPT\nend\n"

#parse_llm_reflection(response_text, original_traces) ⇒ `Object`

# File 'lib/dspy/teleprompt/gepa.rb', line 590

def parse_llm_reflection(response_text, original_traces)
  reflection_id = generate_reflection_id

  begin
    parsed = JSON.parse(response_text)

    # Extract and validate components
    diagnosis = parsed['diagnosis'] || 'LLM reflection analysis'
    improvements = Array(parsed['improvements']).select { |i| i.is_a?(String) && !i.strip.empty? }
    confidence = [parsed['confidence'].to_f, 1.0].min
    reasoning = parsed['reasoning'] || 'LLM-based analysis of execution traces'

    # Validate and sanitize mutation suggestions
    raw_mutations = Array(parsed['suggested_mutations'])
    valid_mutations = raw_mutations.filter_map do |mut|
      mutation_symbol = mut.to_s.downcase.to_sym
      if [:rewrite, :expand, :simplify, :combine, :rephrase].include?(mutation_symbol)
        mutation_symbol
      end
    end.uniq

    # Ensure we have at least one valid mutation suggestion
    valid_mutations = [:rewrite] if valid_mutations.empty?

    ReflectionResult.new(
      trace_id: reflection_id,
      diagnosis: diagnosis,
      improvements: improvements,
      confidence: confidence,
      reasoning: reasoning,
      suggested_mutations: valid_mutations,
      metadata: {
        reflection_model: @config.reflection_lm&.model,
        analysis_timestamp: Time.now,
        trace_count: original_traces.size,
        token_usage: estimate_token_usage(response_text),
        llm_based: true,
        insights: parsed['insights'] || {}
      }
    )

  rescue JSON::ParserError => e
    # Handle malformed JSON response
    ReflectionResult.new(
      trace_id: reflection_id,
      diagnosis: "LLM reflection JSON parsing error: #{e.message}",
      improvements: ['Review prompt structure and LLM response format'],
      confidence: 0.3,
      reasoning: "Failed to parse LLM reflection response as valid JSON",
      suggested_mutations: [:rewrite],
      metadata: {
        reflection_model: @config.reflection_lm&.model,
        analysis_timestamp: Time.now,
        trace_count: original_traces.size,
        token_usage: 0,
        parsing_error: e.message,
        raw_response: response_text.length > 500 ? "#{response_text[0..500]}..." : response_text
      }
    )
  end
end

#reflect_on_traces(traces) ⇒ `Object`

# File 'lib/dspy/teleprompt/gepa.rb', line 386

def reflect_on_traces(traces)
  reflection_id = generate_reflection_id

  if traces.empty?
    return ReflectionResult.new(
      trace_id: reflection_id,
      diagnosis: 'No traces available for analysis',
      improvements: [],
      confidence: 0.0,
      reasoning: 'Cannot provide reflection without execution traces',
      suggested_mutations: [],
      metadata: {
        reflection_model: @config.reflection_lm&.model,
        analysis_timestamp: Time.now,
        trace_count: 0
      }
    )
  end

  patterns = analyze_execution_patterns(traces)
  improvements = generate_improvement_suggestions(patterns)
  mutations = suggest_mutations(patterns)

  # For Phase 1, we generate a simple rule-based analysis
  # Future phases will use LLM-based reflection
  diagnosis = generate_diagnosis(patterns)
  reasoning = generate_reasoning(patterns, traces)
  confidence = calculate_confidence(patterns)

  ReflectionResult.new(
    trace_id: reflection_id,
    diagnosis: diagnosis,
    improvements: improvements,
    confidence: confidence,
    reasoning: reasoning,
    suggested_mutations: mutations,
    metadata: {
      reflection_model: @config.reflection_lm&.model,
      analysis_timestamp: Time.now,
      trace_count: traces.size,
      token_usage: 0 # Phase 1 doesn't use actual LLM reflection
    }
  )
end

#reflect_with_llm(traces) ⇒ `Object`

# File 'lib/dspy/teleprompt/gepa.rb', line 497

def reflect_with_llm(traces)
  return reflect_on_traces(traces) if traces.empty?

  begin
    # Use DSPy::Predict for analysis instead of raw prompts
    prediction = analyze_traces_with_dspy(traces)
    convert_prediction_to_reflection_result(prediction, traces)
  rescue => e
    # Fallback to rule-based analysis on LLM failure
    fallback_result = reflect_on_traces(traces)
    fallback_result.class.new(
      trace_id: fallback_result.trace_id,
      diagnosis: "LLM reflection failed (#{e.message}), using fallback analysis: #{fallback_result.diagnosis}",
      improvements: fallback_result.improvements,
      confidence: [fallback_result.confidence * 0.5, 0.5].min,
      reasoning: "Fallback to rule-based analysis after LLM error: #{fallback_result.reasoning}",
      suggested_mutations: fallback_result.suggested_mutations,
      metadata: fallback_result.metadata.merge(
        llm_error: e.message,
        fallback_used: true
      )
    )
  end
end

#reflection_with_context(traces, context) ⇒ `Object`

# File 'lib/dspy/teleprompt/gepa.rb', line 698

def reflection_with_context(traces, context)
  base_result = reflect_with_llm(traces)

  # Incorporate context into reasoning
  context_reasoning = "Generation #{context[:generation] || 'unknown'} analysis. "
  context_reasoning += "Population size: #{context[:population_size] || 'unknown'}. "

  if context[:current_best_score]
    context_reasoning += "Current best score: #{context[:current_best_score]}. "
  end

  # Adjust mutation suggestions based on history
  adjusted_mutations = adjust_mutations_for_history(
    base_result.suggested_mutations,
    context[:mutation_history] || [],
    context[:recent_performance_trend]
  )

  ReflectionResult.new(
    trace_id: base_result.trace_id,
    diagnosis: base_result.diagnosis,
    improvements: base_result.improvements,
    confidence: base_result.confidence,
    reasoning: context_reasoning + base_result.reasoning,
    suggested_mutations: adjusted_mutations,
    metadata: base_result.metadata.merge(optimization_context: context)
  )
end

#suggest_mutations(patterns) ⇒ `Object`

# File 'lib/dspy/teleprompt/gepa.rb', line 477

def suggest_mutations(patterns)
  mutations = []

  avg_length = patterns[:avg_response_length] || 0
  total_tokens = patterns[:total_tokens] || 0
  llm_count = patterns[:llm_traces_count] || 0

  mutations << :expand if avg_length < 15
  mutations << :simplify if total_tokens > 300
  mutations << :combine if llm_count > 2
  mutations << :rewrite if llm_count == 1
  mutations << :rephrase if mutations.empty?

  mutations.uniq
end

#trace_summary_for_reflection(traces) ⇒ `Object`

# File 'lib/dspy/teleprompt/gepa.rb', line 654

def trace_summary_for_reflection(traces)
  return "No execution traces available" if traces.empty?

  llm_traces = traces.select(&:llm_trace?)
  module_traces = traces.select(&:module_trace?)

  total_tokens = llm_traces.sum(&:token_usage)
  unique_models = llm_traces.map(&:model_name).compact.uniq
  timespan = calculate_timespan(traces)

  avg_response_length = if llm_traces.any?
    total_length = llm_traces.sum { |t| t.response_text&.length || 0 }
    total_length / llm_traces.size
  else
    0
  end

  "    Total traces: \#{traces.size}\n    LLM interactions: \#{llm_traces.size}\n    Module calls: \#{module_traces.size}\n    Total tokens: \#{total_tokens}\n    Models used: \#{unique_models.join(', ')}\n    Average response length: \#{avg_response_length} characters\n    Execution timespan: \#{timespan.round(2)} seconds\n  SUMMARY\nend\n"

Class: DSPy::Teleprompt::GEPA::ReflectionEngine

Overview

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(config = nil) ⇒ ReflectionEngine

Instance Attribute Details

#config ⇒ Object (readonly)

Instance Method Details

#analyze_execution_patterns(traces) ⇒ Object

#analyze_traces_with_dspy(traces) ⇒ Object

#convert_prediction_to_reflection_result(prediction, original_traces) ⇒ Object

#create_trace_reflection_signature ⇒ Object

#extract_optimization_insights(traces) ⇒ Object

#generate_improvement_suggestions(patterns) ⇒ Object

#generate_reflection_prompt(traces) ⇒ Object

#parse_llm_reflection(response_text, original_traces) ⇒ Object

#reflect_on_traces(traces) ⇒ Object

#reflect_with_llm(traces) ⇒ Object

#reflection_with_context(traces, context) ⇒ Object

#suggest_mutations(patterns) ⇒ Object

#trace_summary_for_reflection(traces) ⇒ Object