Module: KubernetesMetadata::WatchPods

Includes:: Common

Included in:: Fluent::Plugin::KubernetesMetadataFilter

Defined in:: lib/fluent/plugin/kubernetes_metadata_watch_pods.rb

Instance Method Summary collapse

#get_pods_and_start_watcher ⇒ Object

List all pods, record the resourceVersion and return a watcher starting from that resourceVersion.
#process_pod_watcher_notices(watcher) ⇒ Object

Process a watcher notice and potentially raise an exception.
#reset_pod_watch_retry_stats ⇒ Object

Reset pod watch retry count and backoff interval as there is a successful watch notice.
#set_up_pod_thread ⇒ Object
#start_pod_watch ⇒ Object

Methods included from Common

#match_annotations, #parse_namespace_metadata, #parse_pod_metadata, #syms_to_strs

Instance Method Details

#get_pods_and_start_watcher ⇒ `Object`

List all pods, record the resourceVersion and return a watcher starting from that resourceVersion.

# File 'lib/fluent/plugin/kubernetes_metadata_watch_pods.rb', line 84

def get_pods_and_start_watcher
  options = {
    resource_version: '0'  # Fetch from API server.
  }
  if ENV['K8S_NODE_NAME']
    options[:field_selector] = 'spec.nodeName=' + ENV['K8S_NODE_NAME']
  end
  pods = @client.get_pods(options)
  pods.each do |pod|
    cache_key = pod.metadata['uid']
    @cache[cache_key] = parse_pod_metadata(pod)
    @stats.bump(:pod_cache_host_updates)
  end
  options[:resource_version] = pods.resourceVersion
  watcher = @client.watch_pods(options)
  watcher
end

#process_pod_watcher_notices(watcher) ⇒ `Object`

Process a watcher notice and potentially raise an exception.

# File 'lib/fluent/plugin/kubernetes_metadata_watch_pods.rb', line 110

def process_pod_watcher_notices(watcher)
  watcher.each do |notice|
    case notice.type
      when 'MODIFIED'
        reset_pod_watch_retry_stats
        cache_key = notice.object['metadata']['uid']
        cached    = @cache[cache_key]
        if cached
          @cache[cache_key] = parse_pod_metadata(notice.object)
          @stats.bump(:pod_cache_watch_updates)
        elsif ENV['K8S_NODE_NAME'] == notice.object['spec']['nodeName'] then
          @cache[cache_key] = parse_pod_metadata(notice.object)
          @stats.bump(:pod_cache_host_updates)
        else
          @stats.bump(:pod_cache_watch_misses)
        end
      when 'DELETED'
        reset_pod_watch_retry_stats
        # ignore and let age out for cases where pods
        # deleted but still processing logs
        @stats.bump(:pod_cache_watch_delete_ignored)
      when 'ERROR'
        @stats.bump(:pod_watch_error_type_notices)
        message = notice['object']['message'] if notice['object'] && notice['object']['message']
        raise "Error while watching pods: #{message}"
      else
        reset_pod_watch_retry_stats
        # Don't pay attention to creations, since the created pod may not
        # end up on this node.
        @stats.bump(:pod_cache_watch_ignored)
    end
  end
end

#reset_pod_watch_retry_stats ⇒ `Object`

Reset pod watch retry count and backoff interval as there is a successful watch notice.

# File 'lib/fluent/plugin/kubernetes_metadata_watch_pods.rb', line 104

def reset_pod_watch_retry_stats
  Thread.current[:pod_watch_retry_count] = 0
  Thread.current[:pod_watch_retry_backoff_interval] = @watch_retry_interval
end

#set_up_pod_thread ⇒ `Object`

# File 'lib/fluent/plugin/kubernetes_metadata_watch_pods.rb', line 26

def set_up_pod_thread
  # Any failures / exceptions in the initial setup should raise
  # Fluent:ConfigError, so that users can inspect potential errors in
  # the configuration.
  pod_watcher = start_pod_watch
  Thread.current[:pod_watch_retry_backoff_interval] = @watch_retry_interval
  Thread.current[:pod_watch_retry_count] = 0

  # Any failures / exceptions in the followup watcher notice
  # processing will be swallowed and retried. These failures /
  # exceptions could be caused by Kubernetes API being temporarily
  # down. We assume the configuration is correct at this point.
  while true
    begin
      pod_watcher ||= get_pods_and_start_watcher
      process_pod_watcher_notices(pod_watcher)
    rescue Exception => e
      @stats.bump(:pod_watch_failures)
      if Thread.current[:pod_watch_retry_count] < @watch_retry_max_times
        # Instead of raising exceptions and crashing Fluentd, swallow
        # the exception and reset the watcher.
        log.info(
          "Exception encountered parsing pod watch event. The " \
          "connection might have been closed. Sleeping for " \
          "#{Thread.current[:pod_watch_retry_backoff_interval]} " \
          "seconds and resetting the pod watcher.", e)
        sleep(Thread.current[:pod_watch_retry_backoff_interval])
        Thread.current[:pod_watch_retry_count] += 1
        Thread.current[:pod_watch_retry_backoff_interval] *= @watch_retry_exponential_backoff_base
        pod_watcher = nil
      else
        # Since retries failed for many times, log as errors instead
        # of info and raise exceptions and trigger Fluentd to restart.
        message =
          "Exception encountered parsing pod watch event. The " \
          "connection might have been closed. Retried " \
          "#{@watch_retry_max_times} times yet still failing. Restarting."
        log.error(message, e)
        raise Fluent::UnrecoverableError.new(message)
      end
    end
  end
end

#start_pod_watch ⇒ `Object`

# File 'lib/fluent/plugin/kubernetes_metadata_watch_pods.rb', line 70

def start_pod_watch
  get_pods_and_start_watcher
rescue Exception => e
  message = "start_pod_watch: Exception encountered setting up pod watch " \
            "from Kubernetes API #{@apiVersion} endpoint " \
            "#{@kubernetes_url}: #{e.message}"
  message += " (#{e.response})" if e.respond_to?(:response)
  log.debug(message)

  raise Fluent::ConfigError, message
end

Module: KubernetesMetadata::WatchPods

Instance Method Summary collapse

Methods included from Common

Instance Method Details

#get_pods_and_start_watcher ⇒ Object

#process_pod_watcher_notices(watcher) ⇒ Object

#reset_pod_watch_retry_stats ⇒ Object

#set_up_pod_thread ⇒ Object

#start_pod_watch ⇒ Object

#get_pods_and_start_watcher ⇒ `Object`

#process_pod_watcher_notices(watcher) ⇒ `Object`

#reset_pod_watch_retry_stats ⇒ `Object`

#set_up_pod_thread ⇒ `Object`

#start_pod_watch ⇒ `Object`