Method: Krane::Pod::Container#doom_reason

Defined in:
lib/krane/kubernetes_resource/pod.rb

#doom_reasonObject



222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
# File 'lib/krane/kubernetes_resource/pod.rb', line 222

def doom_reason
  limbo_reason = @status.dig("state", "waiting", "reason")
  limbo_message = @status.dig("state", "waiting", "message")

  if limbo_reason == "CrashLoopBackOff"
    exit_code = @status.dig('lastState', 'terminated', 'exitCode')
    "Crashing repeatedly (exit #{exit_code}). See logs for more information."
  elsif limbo_reason == "ErrImagePull" && limbo_message.match(/not found/i)
    "Failed to pull image #{@image}. "\
    "Did you wait for it to be built and pushed to the registry before deploying?"
    # Only fail fast when message doesn't include `failed to sync %s cache`.
    # It's possible that a secret/configmap is still trying to be mounted to the pod, it seems related
    # to too many pods referencing the same secret/configmap: https://github.com/kubernetes/kubernetes/pull/74755
    # Error message format source: https://github.com/kubernetes/kubernetes/pull/75260
  elsif limbo_reason == "CreateContainerConfigError" && !limbo_message.match("failed to sync (.*?) cache")
    "Failed to generate container configuration: #{limbo_message}"
  elsif @status.dig("lastState", "terminated", "reason") == "ContainerCannotRun"
    # ref: https://github.com/kubernetes/kubernetes/blob/562e721ece8a16e05c7e7d6bdd6334c910733ab2/pkg/kubelet/dockershim/docker_container.go#L353
    exit_code = @status.dig('lastState', 'terminated', 'exitCode')
    # We've observed failures here that are actually issues with the node or kube infra, and not with the
    # container. These issues have been transient and result in a 128 exit code, so do not treat these as fatal.
    return if exit_code == 128
    "Failed to start (exit #{exit_code}): #{@status.dig('lastState', 'terminated', 'message')}"
  elsif @status.dig("state", "terminated", "reason") == "ContainerCannotRun"
    exit_code = @status.dig('state', 'terminated', 'exitCode')
    return if exit_code == 128
    "Failed to start (exit #{exit_code}): #{@status.dig('state', 'terminated', 'message')}"
  end
end