Class: Aidp::Harness::ErrorHandler
- Inherits:
-
Object
- Object
- Aidp::Harness::ErrorHandler
- Includes:
- DebugMixin
- Defined in:
- lib/aidp/harness/error_handler.rb
Overview
Handles error recovery, retry strategies, and fallback mechanisms
Defined Under Namespace
Classes: BackoffCalculator, ErrorClassifier, RecoveryPlanner, Sleeper
Constant Summary
Constants included from DebugMixin
DebugMixin::DEBUG_BASIC, DebugMixin::DEBUG_OFF, DebugMixin::DEBUG_VERBOSE
Instance Attribute Summary collapse
-
#backoff_calculator ⇒ Object
readonly
Expose internal components for testability.
-
#error_classifier ⇒ Object
readonly
Expose internal components for testability.
-
#recovery_planner ⇒ Object
readonly
Expose internal components for testability.
-
#retry_strategies ⇒ Object
readonly
Expose internal components for testability.
Instance Method Summary collapse
-
#attempt_recovery(error_info, context = {}) ⇒ Object
Attempt recovery when retries are exhausted or not applicable.
-
#circuit_breaker_status ⇒ Object
Get circuit breaker status.
-
#clear_error_history ⇒ Object
Clear error history.
-
#error_history(time_range = nil) ⇒ Object
Get error history.
-
#error_stats ⇒ Object
Get error statistics.
-
#execute_retry(error_info, strategy, context = {}) ⇒ Object
Execute a retry with the given strategy.
-
#execute_with_retry(&block) ⇒ Object
Execute a block with retry logic.
-
#handle_error(error, context = {}) ⇒ Object
Main entry point for error handling.
-
#initialize(provider_manager, configuration, metrics_manager = nil, sleeper: nil) ⇒ ErrorHandler
constructor
A new instance of ErrorHandler.
-
#max_attempts ⇒ Object
Get maximum retry attempts.
-
#reset_all_circuit_breakers ⇒ Object
Reset all circuit breakers.
-
#reset_circuit_breaker(provider, model = nil) ⇒ Object
Reset circuit breaker.
-
#reset_retry_counts(provider, model = nil) ⇒ Object
Reset retry counts for a specific provider/model combination.
-
#retry_status(provider, model = nil) ⇒ Object
Get retry status for a provider/model.
-
#retry_strategy(error_type) ⇒ Object
Get retry strategy for error type.
-
#should_retry?(error_info, strategy) ⇒ Boolean
Check if we should retry based on error type and strategy.
Methods included from DebugMixin
#debug_basic?, #debug_command, #debug_enabled?, #debug_error, #debug_execute_command, #debug_level, #debug_log, #debug_logger, #debug_provider, #debug_step, #debug_timing, #debug_verbose?, included, shared_logger
Constructor Details
#initialize(provider_manager, configuration, metrics_manager = nil, sleeper: nil) ⇒ ErrorHandler
Returns a new instance of ErrorHandler.
26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
# File 'lib/aidp/harness/error_handler.rb', line 26 def initialize(provider_manager, configuration, metrics_manager = nil, sleeper: nil) @provider_manager = provider_manager @configuration = configuration @metrics_manager = metrics_manager @sleeper = sleeper || Sleeper.new @retry_strategies = {} @retry_counts = {} @error_history = [] @circuit_breakers = {} @backoff_calculator = BackoffCalculator.new @error_classifier = ErrorClassifier.new @recovery_planner = RecoveryPlanner.new initialize_retry_strategies end |
Instance Attribute Details
#backoff_calculator ⇒ Object (readonly)
Expose internal components for testability
16 17 18 |
# File 'lib/aidp/harness/error_handler.rb', line 16 def backoff_calculator @backoff_calculator end |
#error_classifier ⇒ Object (readonly)
Expose internal components for testability
16 17 18 |
# File 'lib/aidp/harness/error_handler.rb', line 16 def error_classifier @error_classifier end |
#recovery_planner ⇒ Object (readonly)
Expose internal components for testability
16 17 18 |
# File 'lib/aidp/harness/error_handler.rb', line 16 def recovery_planner @recovery_planner end |
#retry_strategies ⇒ Object (readonly)
Expose internal components for testability
16 17 18 |
# File 'lib/aidp/harness/error_handler.rb', line 16 def retry_strategies @retry_strategies end |
Instance Method Details
#attempt_recovery(error_info, context = {}) ⇒ Object
Attempt recovery when retries are exhausted or not applicable
228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 |
# File 'lib/aidp/harness/error_handler.rb', line 228 def attempt_recovery(error_info, context = {}) recovery_plan = @recovery_planner.create_recovery_plan(error_info, context) case recovery_plan[:action] when :switch_provider attempt_provider_switch(error_info, recovery_plan) when :switch_model attempt_model_switch(error_info, recovery_plan) when :circuit_breaker open_circuit_breaker(error_info, recovery_plan) when :escalate escalate_error(error_info, recovery_plan) when :abort abort_execution(error_info, recovery_plan) else { success: false, action: :unknown_recovery, error: "Unknown recovery action: #{recovery_plan[:action]}" } end end |
#circuit_breaker_status ⇒ Object
Get circuit breaker status
324 325 326 327 328 329 330 331 332 333 |
# File 'lib/aidp/harness/error_handler.rb', line 324 def circuit_breaker_status @circuit_breakers.transform_values do |cb| { open: cb[:open], opened_at: cb[:opened_at], failure_count: cb[:failure_count], threshold: cb[:threshold] } end end |
#clear_error_history ⇒ Object
Clear error history
319 320 321 |
# File 'lib/aidp/harness/error_handler.rb', line 319 def clear_error_history @error_history.clear end |
#error_history(time_range = nil) ⇒ Object
Get error history
310 311 312 313 314 315 316 |
# File 'lib/aidp/harness/error_handler.rb', line 310 def error_history(time_range = nil) if time_range @error_history.select { |e| time_range.include?(e[:timestamp]) } else @error_history end end |
#error_stats ⇒ Object
Get error statistics
42 43 44 45 46 47 48 49 50 |
# File 'lib/aidp/harness/error_handler.rb', line 42 def error_stats { total_errors: @error_history.size, error_types: @error_history.group_by { |e| e[:error_type] }.transform_values(&:size), recent_errors: @error_history.last(10), retry_counts: @retry_counts.dup, circuit_breaker_states: @circuit_breakers.transform_values { |cb| cb[:state] } } end |
#execute_retry(error_info, strategy, context = {}) ⇒ Object
Execute a retry with the given strategy
182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 |
# File 'lib/aidp/harness/error_handler.rb', line 182 def execute_retry(error_info, strategy, context = {}) provider = error_info[:provider] model = error_info[:model] error_type = error_info[:error_type] # Increment retry count retry_key = "#{provider}:#{model}:#{error_type}" @retry_counts[retry_key] ||= 0 @retry_counts[retry_key] += 1 # Check if we've exceeded max retries if @retry_counts[retry_key] > strategy[:max_retries] return { success: false, action: :exhausted_retries, error: "Max retries exceeded for #{error_type}", retry_count: @retry_counts[retry_key], next_action: :fallback } end # Calculate backoff delay delay = @backoff_calculator.calculate_delay( @retry_counts[retry_key], strategy[:backoff_strategy], strategy[:base_delay], strategy[:max_delay] ) # Wait for backoff delay @sleeper.sleep(delay) if delay > 0 # Execute the retry retry_result = execute_retry_attempt(error_info, strategy, context) # Update retry result with metadata retry_result.merge!( retry_count: @retry_counts[retry_key], delay: delay, strategy: strategy[:name] ) retry_result end |
#execute_with_retry(&block) ⇒ Object
Execute a block with retry logic
104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 |
# File 'lib/aidp/harness/error_handler.rb', line 104 def execute_with_retry(&block) providers_tried = [] loop do max_attempts = @configuration.max_retries + 1 attempt = 0 begin attempt += 1 return yield rescue Aidp::Errors::ConfigurationError # Configuration errors should crash immediately (crash-early principle) # Re-raise without catching raise rescue => error current_provider = current_provider_safely if attempt < max_attempts error_info = { error: error, provider: current_provider, model: current_model_safely, error_type: @error_classifier.classify_error(error) } strategy = retry_strategy(error_info[:error_type]) if should_retry?(error_info, strategy) delay = @backoff_calculator.calculate_delay(attempt, strategy[:backoff_strategy] || :exponential, 1, 10) debug_log("🔁 Retry attempt #{attempt} for #{current_provider}", level: :info, data: {delay: delay, error_type: error_info[:error_type]}) @sleeper.sleep(delay) if delay > 0 retry end end # Provider exhausted – attempt recovery (may switch provider) debug_log("🚫 Exhausted retries for provider, attempting recovery", level: :warn, data: {provider: current_provider, attempt: attempt, max_attempts: max_attempts}) handle_error(error, { provider: current_provider, model: current_model_safely, exhausted_retries: true }) new_provider = current_provider_safely if new_provider != current_provider && !providers_tried.include?(new_provider) providers_tried << current_provider # Reset retry counts for the new provider begin reset_retry_counts(new_provider) rescue => e debug_log("⚠️ Failed to reset retry counts for new provider", level: :warn, data: {error: e.}) end debug_log("🔀 Switched provider after failure – re-executing block", level: :info, data: {from: current_provider, to: new_provider}) # Start retry loop fresh for new provider next end # No new provider (or already tried) – return structured failure debug_log("❌ No fallback provider available or all tried", level: :error, data: {providers_tried: providers_tried}) begin if @provider_manager.respond_to?(:mark_provider_failure_exhausted) @provider_manager.mark_provider_failure_exhausted(current_provider) debug_log("🛑 Marked provider #{current_provider} unhealthy due to exhausted retries", level: :warn) end rescue => e debug_log("⚠️ Failed to mark provider failure-exhausted", level: :warn, data: {error: e.}) end return { status: "failed", error: error, message: error., provider: current_provider, providers_tried: providers_tried.dup } end end end |
#handle_error(error, context = {}) ⇒ Object
Main entry point for error handling
53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
# File 'lib/aidp/harness/error_handler.rb', line 53 def handle_error(error, context = {}) error_info = @error_classifier.classify_error(error, context) # Debug logging debug_error(error, context) debug_log("🔧 ErrorHandler: Processing error", level: :info, data: { error_type: error_info[:error_type], provider: error_info[:provider], model: error_info[:model] }) # Record error in metrics if available @metrics_manager&.record_error(error_info[:provider], error_info[:model], error_info) # Add to error history @error_history << error_info # Get retry strategy for this error type strategy = retry_strategy(error_info[:error_type]) # Check if we should retry if should_retry?(error_info, strategy) debug_log("🔄 ErrorHandler: Attempting retry", level: :info, data: { strategy: strategy[:name], max_retries: strategy[:max_retries] }) execute_retry(error_info, strategy, context) else # No retry, attempt recovery debug_log("🚨 ErrorHandler: No retry, attempting recovery", level: :warn, data: { error_type: error_info[:error_type], reason: "Retry not applicable or exhausted" }) if error_info[:error_type].to_sym == :auth_expired # Mark provider unhealthy to avoid immediate re-selection begin if @provider_manager.respond_to?(:mark_provider_auth_failure) @provider_manager.mark_provider_auth_failure(error_info[:provider]) debug_log("🔐 Marked provider #{error_info[:provider]} unhealthy due to auth error", level: :warn) end rescue => e debug_log("⚠️ Failed to mark provider unhealthy after auth error", level: :warn, data: {error: e.}) end end attempt_recovery(error_info, context) end end |
#max_attempts ⇒ Object
Get maximum retry attempts
257 258 259 |
# File 'lib/aidp/harness/error_handler.rb', line 257 def max_attempts @configuration.respond_to?(:max_retries) ? @configuration.max_retries : 3 end |
#reset_all_circuit_breakers ⇒ Object
Reset all circuit breakers
342 343 344 |
# File 'lib/aidp/harness/error_handler.rb', line 342 def reset_all_circuit_breakers @circuit_breakers.clear end |
#reset_circuit_breaker(provider, model = nil) ⇒ Object
Reset circuit breaker
336 337 338 339 |
# File 'lib/aidp/harness/error_handler.rb', line 336 def reset_circuit_breaker(provider, model = nil) key = model ? "#{provider}:#{model}" : provider @circuit_breakers.delete(key) end |
#reset_retry_counts(provider, model = nil) ⇒ Object
Reset retry counts for a specific provider/model combination
277 278 279 280 281 282 283 284 285 286 287 |
# File 'lib/aidp/harness/error_handler.rb', line 277 def reset_retry_counts(provider, model = nil) keys_to_reset = if model # Reset specific model @retry_counts.keys.select { |k| k.start_with?("#{provider}:#{model}:") } else # Reset all models for provider @retry_counts.keys.select { |k| k.start_with?("#{provider}:") } end keys_to_reset.each { |key| @retry_counts.delete(key) } end |
#retry_status(provider, model = nil) ⇒ Object
Get retry status for a provider/model
290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 |
# File 'lib/aidp/harness/error_handler.rb', line 290 def retry_status(provider, model = nil) keys = if model @retry_counts.keys.select { |k| k.start_with?("#{provider}:#{model}:") } else @retry_counts.keys.select { |k| k.start_with?("#{provider}:") } end status = {} keys.each do |key| error_type = key.split(":").last status[error_type] = { retry_count: @retry_counts[key], max_retries: retry_strategy(error_type.to_sym)[:max_retries] } end status end |
#retry_strategy(error_type) ⇒ Object
Get retry strategy for error type
252 253 254 |
# File 'lib/aidp/harness/error_handler.rb', line 252 def retry_strategy(error_type) @retry_strategies[error_type] || @retry_strategies[:default] end |
#should_retry?(error_info, strategy) ⇒ Boolean
Check if we should retry based on error type and strategy
262 263 264 265 266 267 268 269 270 271 272 273 274 |
# File 'lib/aidp/harness/error_handler.rb', line 262 def should_retry?(error_info, strategy) return false unless strategy[:enabled] # Use ErrorTaxonomy to determine if error is retryable error_type = error_info[:error_type] return false unless Aidp::Providers::ErrorTaxonomy.retryable?(error_type) # Check circuit breaker circuit_breaker_key = "#{error_info[:provider]}:#{error_info[:model]}" return false if circuit_breaker_open?(circuit_breaker_key) true end |