Class: Gitlab::SidekiqMiddleware::MemoryKiller

Inherits:
Object
  • Object
show all
Defined in:
lib/gitlab/sidekiq_middleware/memory_killer.rb

Constant Summary collapse

MAX_RSS =

Default the RSS limit to 0, meaning the MemoryKiller is disabled

(ENV['SIDEKIQ_MEMORY_KILLER_MAX_RSS'] || 0).to_s.to_i
GRACE_TIME =

Give Sidekiq 15 minutes of grace time after exceeding the RSS limit

(ENV['SIDEKIQ_MEMORY_KILLER_GRACE_TIME'] || 15 * 60).to_s.to_i
SHUTDOWN_WAIT =

Wait 30 seconds for running jobs to finish during graceful shutdown

(ENV['SIDEKIQ_MEMORY_KILLER_SHUTDOWN_WAIT'] || 30).to_s.to_i
MUTEX =

Create a mutex used to ensure there will be only one thread waiting to shut Sidekiq down

Mutex.new

Instance Attribute Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#workerObject (readonly)

Returns the value of attribute worker


17
18
19
# File 'lib/gitlab/sidekiq_middleware/memory_killer.rb', line 17

def worker
  @worker
end

Instance Method Details

#call(worker, job, queue) ⇒ Object


19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/gitlab/sidekiq_middleware/memory_killer.rb', line 19

def call(worker, job, queue)
  yield

  @worker = worker
  current_rss = get_rss

  return unless MAX_RSS > 0 && current_rss > MAX_RSS

  Thread.new do
    # Return if another thread is already waiting to shut Sidekiq down
    next unless MUTEX.try_lock

    warn("Sidekiq worker PID-#{pid} current RSS #{current_rss}"\
         " exceeds maximum RSS #{MAX_RSS} after finishing job #{worker.class} JID-#{job['jid']}")

    warn("Sidekiq worker PID-#{pid} will stop fetching new jobs"\
         " in #{GRACE_TIME} seconds, and will be shut down #{SHUTDOWN_WAIT} seconds later")

    # Wait `GRACE_TIME` to give the memory intensive job time to finish.
    # Then, tell Sidekiq to stop fetching new jobs.
    wait_and_signal(GRACE_TIME, 'SIGTSTP', 'stop fetching new jobs')

    # Wait `SHUTDOWN_WAIT` to give already fetched jobs time to finish.
    # Then, tell Sidekiq to gracefully shut down by giving jobs a few more
    # moments to finish, killing and requeuing them if they didn't, and
    # then terminating itself. Sidekiq will replicate the TERM to all its
    # children if it can.
    wait_and_signal(SHUTDOWN_WAIT, 'SIGTERM', 'gracefully shut down')

    # Wait for Sidekiq to shutdown gracefully, and kill it if it didn't.
    # Kill the whole pgroup, so we can be sure no children are left behind
    wait_and_signal_pgroup(Sidekiq.options[:timeout] + 2, 'SIGKILL', 'die')
  end
end