Class: Aidp::Harness::ErrorHandler
- Inherits:
-
Object
- Object
- Aidp::Harness::ErrorHandler
- Includes:
- DebugMixin
- Defined in:
- lib/aidp/harness/error_handler.rb
Overview
Handles error recovery, retry strategies, and fallback mechanisms
Defined Under Namespace
Classes: BackoffCalculator, ErrorClassifier, RecoveryPlanner, Sleeper
Constant Summary
Constants included from DebugMixin
DebugMixin::DEBUG_BASIC, DebugMixin::DEBUG_OFF, DebugMixin::DEBUG_VERBOSE
Instance Method Summary collapse
-
#attempt_recovery(error_info, context = {}) ⇒ Object
Attempt recovery when retries are exhausted or not applicable.
-
#circuit_breaker_status ⇒ Object
Get circuit breaker status.
-
#clear_error_history ⇒ Object
Clear error history.
-
#error_history(time_range = nil) ⇒ Object
Get error history.
-
#error_stats ⇒ Object
Get error statistics.
-
#execute_retry(error_info, strategy, context = {}) ⇒ Object
Execute a retry with the given strategy.
-
#execute_with_retry(&block) ⇒ Object
Execute a block with retry logic.
-
#handle_error(error, context = {}) ⇒ Object
Main entry point for error handling.
-
#initialize(provider_manager, configuration, metrics_manager = nil, sleeper: nil) ⇒ ErrorHandler
constructor
A new instance of ErrorHandler.
-
#max_attempts ⇒ Object
Get maximum retry attempts.
-
#reset_all_circuit_breakers ⇒ Object
Reset all circuit breakers.
-
#reset_circuit_breaker(provider, model = nil) ⇒ Object
Reset circuit breaker.
-
#reset_retry_counts(provider, model = nil) ⇒ Object
Reset retry counts for a specific provider/model combination.
-
#retry_status(provider, model = nil) ⇒ Object
Get retry status for a provider/model.
-
#retry_strategy(error_type) ⇒ Object
Get retry strategy for error type.
-
#should_retry?(error_info, strategy) ⇒ Boolean
Check if we should retry based on error type and strategy.
Methods included from DebugMixin
#debug_basic?, #debug_command, #debug_enabled?, #debug_error, #debug_execute_command, #debug_level, #debug_log, #debug_logger, #debug_provider, #debug_step, #debug_timing, #debug_verbose?, included, shared_logger
Constructor Details
#initialize(provider_manager, configuration, metrics_manager = nil, sleeper: nil) ⇒ ErrorHandler
Returns a new instance of ErrorHandler.
23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
# File 'lib/aidp/harness/error_handler.rb', line 23 def initialize(provider_manager, configuration, metrics_manager = nil, sleeper: nil) @provider_manager = provider_manager @configuration = configuration @metrics_manager = metrics_manager @sleeper = sleeper || Sleeper.new @retry_strategies = {} @retry_counts = {} @error_history = [] @circuit_breakers = {} @backoff_calculator = BackoffCalculator.new @error_classifier = ErrorClassifier.new @recovery_planner = RecoveryPlanner.new initialize_retry_strategies end |
Instance Method Details
#attempt_recovery(error_info, context = {}) ⇒ Object
Attempt recovery when retries are exhausted or not applicable
225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 |
# File 'lib/aidp/harness/error_handler.rb', line 225 def attempt_recovery(error_info, context = {}) recovery_plan = @recovery_planner.create_recovery_plan(error_info, context) case recovery_plan[:action] when :switch_provider attempt_provider_switch(error_info, recovery_plan) when :switch_model attempt_model_switch(error_info, recovery_plan) when :circuit_breaker open_circuit_breaker(error_info, recovery_plan) when :escalate escalate_error(error_info, recovery_plan) when :abort abort_execution(error_info, recovery_plan) else { success: false, action: :unknown_recovery, error: "Unknown recovery action: #{recovery_plan[:action]}" } end end |
#circuit_breaker_status ⇒ Object
Get circuit breaker status
321 322 323 324 325 326 327 328 329 330 |
# File 'lib/aidp/harness/error_handler.rb', line 321 def circuit_breaker_status @circuit_breakers.transform_values do |cb| { open: cb[:open], opened_at: cb[:opened_at], failure_count: cb[:failure_count], threshold: cb[:threshold] } end end |
#clear_error_history ⇒ Object
Clear error history
316 317 318 |
# File 'lib/aidp/harness/error_handler.rb', line 316 def clear_error_history @error_history.clear end |
#error_history(time_range = nil) ⇒ Object
Get error history
307 308 309 310 311 312 313 |
# File 'lib/aidp/harness/error_handler.rb', line 307 def error_history(time_range = nil) if time_range @error_history.select { |e| time_range.include?(e[:timestamp]) } else @error_history end end |
#error_stats ⇒ Object
Get error statistics
39 40 41 42 43 44 45 46 47 |
# File 'lib/aidp/harness/error_handler.rb', line 39 def error_stats { total_errors: @error_history.size, error_types: @error_history.group_by { |e| e[:error_type] }.transform_values(&:size), recent_errors: @error_history.last(10), retry_counts: @retry_counts.dup, circuit_breaker_states: @circuit_breakers.transform_values { |cb| cb[:state] } } end |
#execute_retry(error_info, strategy, context = {}) ⇒ Object
Execute a retry with the given strategy
179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 |
# File 'lib/aidp/harness/error_handler.rb', line 179 def execute_retry(error_info, strategy, context = {}) provider = error_info[:provider] model = error_info[:model] error_type = error_info[:error_type] # Increment retry count retry_key = "#{provider}:#{model}:#{error_type}" @retry_counts[retry_key] ||= 0 @retry_counts[retry_key] += 1 # Check if we've exceeded max retries if @retry_counts[retry_key] > strategy[:max_retries] return { success: false, action: :exhausted_retries, error: "Max retries exceeded for #{error_type}", retry_count: @retry_counts[retry_key], next_action: :fallback } end # Calculate backoff delay delay = @backoff_calculator.calculate_delay( @retry_counts[retry_key], strategy[:backoff_strategy], strategy[:base_delay], strategy[:max_delay] ) # Wait for backoff delay @sleeper.sleep(delay) if delay > 0 # Execute the retry retry_result = execute_retry_attempt(error_info, strategy, context) # Update retry result with metadata retry_result.merge!( retry_count: @retry_counts[retry_key], delay: delay, strategy: strategy[:name] ) retry_result end |
#execute_with_retry(&block) ⇒ Object
Execute a block with retry logic
101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 |
# File 'lib/aidp/harness/error_handler.rb', line 101 def execute_with_retry(&block) providers_tried = [] loop do max_attempts = @configuration.max_retries + 1 attempt = 0 begin attempt += 1 return yield rescue Aidp::Errors::ConfigurationError # Configuration errors should crash immediately (crash-early principle) # Re-raise without catching raise rescue => error current_provider = current_provider_safely if attempt < max_attempts error_info = { error: error, provider: current_provider, model: current_model_safely, error_type: @error_classifier.classify_error(error) } strategy = retry_strategy(error_info[:error_type]) if should_retry?(error_info, strategy) delay = @backoff_calculator.calculate_delay(attempt, strategy[:backoff_strategy] || :exponential, 1, 10) debug_log("🔁 Retry attempt #{attempt} for #{current_provider}", level: :info, data: {delay: delay, error_type: error_info[:error_type]}) @sleeper.sleep(delay) if delay > 0 retry end end # Provider exhausted – attempt recovery (may switch provider) debug_log("🚫 Exhausted retries for provider, attempting recovery", level: :warn, data: {provider: current_provider, attempt: attempt, max_attempts: max_attempts}) handle_error(error, { provider: current_provider, model: current_model_safely, exhausted_retries: true }) new_provider = current_provider_safely if new_provider != current_provider && !providers_tried.include?(new_provider) providers_tried << current_provider # Reset retry counts for the new provider begin reset_retry_counts(new_provider) rescue => e debug_log("⚠️ Failed to reset retry counts for new provider", level: :warn, data: {error: e.}) end debug_log("🔀 Switched provider after failure – re-executing block", level: :info, data: {from: current_provider, to: new_provider}) # Start retry loop fresh for new provider next end # No new provider (or already tried) – return structured failure debug_log("❌ No fallback provider available or all tried", level: :error, data: {providers_tried: providers_tried}) begin if @provider_manager.respond_to?(:mark_provider_failure_exhausted) @provider_manager.mark_provider_failure_exhausted(current_provider) debug_log("🛑 Marked provider #{current_provider} unhealthy due to exhausted retries", level: :warn) end rescue => e debug_log("⚠️ Failed to mark provider failure-exhausted", level: :warn, data: {error: e.}) end return { status: "failed", error: error, message: error., provider: current_provider, providers_tried: providers_tried.dup } end end end |
#handle_error(error, context = {}) ⇒ Object
Main entry point for error handling
50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
# File 'lib/aidp/harness/error_handler.rb', line 50 def handle_error(error, context = {}) error_info = @error_classifier.classify_error(error, context) # Debug logging debug_error(error, context) debug_log("🔧 ErrorHandler: Processing error", level: :info, data: { error_type: error_info[:error_type], provider: error_info[:provider], model: error_info[:model] }) # Record error in metrics if available @metrics_manager&.record_error(error_info[:provider], error_info[:model], error_info) # Add to error history @error_history << error_info # Get retry strategy for this error type strategy = retry_strategy(error_info[:error_type]) # Check if we should retry if should_retry?(error_info, strategy) debug_log("🔄 ErrorHandler: Attempting retry", level: :info, data: { strategy: strategy[:name], max_retries: strategy[:max_retries] }) execute_retry(error_info, strategy, context) else # No retry, attempt recovery debug_log("🚨 ErrorHandler: No retry, attempting recovery", level: :warn, data: { error_type: error_info[:error_type], reason: "Retry not applicable or exhausted" }) if error_info[:error_type].to_sym == :auth_expired # Mark provider unhealthy to avoid immediate re-selection begin if @provider_manager.respond_to?(:mark_provider_auth_failure) @provider_manager.mark_provider_auth_failure(error_info[:provider]) debug_log("🔐 Marked provider #{error_info[:provider]} unhealthy due to auth error", level: :warn) end rescue => e debug_log("⚠️ Failed to mark provider unhealthy after auth error", level: :warn, data: {error: e.}) end end attempt_recovery(error_info, context) end end |
#max_attempts ⇒ Object
Get maximum retry attempts
254 255 256 |
# File 'lib/aidp/harness/error_handler.rb', line 254 def max_attempts @configuration.respond_to?(:max_retries) ? @configuration.max_retries : 3 end |
#reset_all_circuit_breakers ⇒ Object
Reset all circuit breakers
339 340 341 |
# File 'lib/aidp/harness/error_handler.rb', line 339 def reset_all_circuit_breakers @circuit_breakers.clear end |
#reset_circuit_breaker(provider, model = nil) ⇒ Object
Reset circuit breaker
333 334 335 336 |
# File 'lib/aidp/harness/error_handler.rb', line 333 def reset_circuit_breaker(provider, model = nil) key = model ? "#{provider}:#{model}" : provider @circuit_breakers.delete(key) end |
#reset_retry_counts(provider, model = nil) ⇒ Object
Reset retry counts for a specific provider/model combination
274 275 276 277 278 279 280 281 282 283 284 |
# File 'lib/aidp/harness/error_handler.rb', line 274 def reset_retry_counts(provider, model = nil) keys_to_reset = if model # Reset specific model @retry_counts.keys.select { |k| k.start_with?("#{provider}:#{model}:") } else # Reset all models for provider @retry_counts.keys.select { |k| k.start_with?("#{provider}:") } end keys_to_reset.each { |key| @retry_counts.delete(key) } end |
#retry_status(provider, model = nil) ⇒ Object
Get retry status for a provider/model
287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 |
# File 'lib/aidp/harness/error_handler.rb', line 287 def retry_status(provider, model = nil) keys = if model @retry_counts.keys.select { |k| k.start_with?("#{provider}:#{model}:") } else @retry_counts.keys.select { |k| k.start_with?("#{provider}:") } end status = {} keys.each do |key| error_type = key.split(":").last status[error_type] = { retry_count: @retry_counts[key], max_retries: retry_strategy(error_type.to_sym)[:max_retries] } end status end |
#retry_strategy(error_type) ⇒ Object
Get retry strategy for error type
249 250 251 |
# File 'lib/aidp/harness/error_handler.rb', line 249 def retry_strategy(error_type) @retry_strategies[error_type] || @retry_strategies[:default] end |
#should_retry?(error_info, strategy) ⇒ Boolean
Check if we should retry based on error type and strategy
259 260 261 262 263 264 265 266 267 268 269 270 271 |
# File 'lib/aidp/harness/error_handler.rb', line 259 def should_retry?(error_info, strategy) return false unless strategy[:enabled] # Use ErrorTaxonomy to determine if error is retryable error_type = error_info[:error_type] return false unless Aidp::Providers::ErrorTaxonomy.retryable?(error_type) # Check circuit breaker circuit_breaker_key = "#{error_info[:provider]}:#{error_info[:model]}" return false if circuit_breaker_open?(circuit_breaker_key) true end |