Class: Process::Naf::Runner

Inherits:

Af::Application

Object
Af::Application
Process::Naf::Runner

show all

Defined in:: app/models/process/naf/runner.rb

Instance Attribute Summary collapse

#current_invocation ⇒ Object

Returns the value of attribute current_invocation.
#machine ⇒ Object

Returns the value of attribute machine.

Instance Method Summary collapse

#assigned_jobs(record) ⇒ Object
#check_dead_children_not_exited_properly ⇒ Object

XXX is there a race condition where a child process exits XXX has not set pid or status yet and timeout fires? XXX i bet there is XXX so this code is here:.
#check_gc_configurations ⇒ Object
#check_log_level ⇒ Object
#check_schedules ⇒ Object
#cleanup_dead_child(pid, status) ⇒ Object
#cleanup_dead_children ⇒ Object
#cleanup_old_processes ⇒ Object
#emergency_teardown ⇒ Object

kill(0, pid) seems to fail during at_exit block so this shoots from the hip.
#escape_html(str) ⇒ Object
#finish_job(running_job, updates = {}) ⇒ Object
#initialize ⇒ Runner constructor

A new instance of Runner.
#is_job_process_alive?(job) ⇒ Boolean
#log_output_until_job_finishes(job_id, stdout, stderr) ⇒ Object
#memory_available_to_spawn? ⇒ Boolean
#retrieve_invocation_information ⇒ Object
#send_signal_and_maybe_clean_up(job, signal) ⇒ Object
#should_be_queued ⇒ Object
#start_new_jobs ⇒ Object
#terminate_old_processes(record) ⇒ Object
#update_historical_job(updates, historical_job_id) ⇒ Object

XXX update_all doesn’t support “from_partition” so we have this helper.
#wind_down_runners ⇒ Object
#work ⇒ Object
#work_machine ⇒ Object
#work_machine_loop ⇒ Object

Constructor Details

#initialize ⇒ `Runner`

Returns a new instance of Runner.

# File 'app/models/process/naf/runner.rb', line 51

def initialize
  super
  opt :log_configuration_files, default: ["af.yml",
                                          "af-#{Rails.env}.yml",
                                          "naf.yml",
                                          "naf-#{Rails.env}.yml",
                                          "nafrunner.yml",
                                          "nafrunner-#{Rails.env}.yml",
                                          "#{af_name}.yml",
                                          "#{af_name}-#{Rails.env}.yml"]
  @last_machine_log_level = nil
end

Instance Attribute Details

#current_invocation ⇒ `Object`

Returns the value of attribute current_invocation.



6
7
8

# File 'app/models/process/naf/runner.rb', line 6

def current_invocation
  @current_invocation
end

#machine ⇒ `Object`

Returns the value of attribute machine.



6
7
8

# File 'app/models/process/naf/runner.rb', line 6

def machine
  @machine
end

Instance Method Details

#assigned_jobs(record) ⇒ `Object`

# File 'app/models/process/naf/runner.rb', line 590

def assigned_jobs(record)
  if record.kind_of? ::Naf::MachineRunnerInvocation
    return ::Naf::RunningJob.started_on_invocation(record.id).select do |job|
      is_job_process_alive?(job)
    end
  else
    return ::Naf::RunningJob.assigned_jobs(record).select do |job|
      is_job_process_alive?(job)
    end
  end
end

#check_dead_children_not_exited_properly ⇒ `Object`

XXX is there a race condition where a child process exits XXX has not set pid or status yet and timeout fires? XXX i bet there is XXX so this code is here:

# File 'app/models/process/naf/runner.rb', line 321

def check_dead_children_not_exited_properly
  dead_children = []
  @children.each do |pid, child|
    unless is_job_process_alive?(child.reload)
      dead_children << child
    end
  end

  unless dead_children.blank?
    logger.error escape_html("#{machine}: dead children even with timeout during waitpid2(): #{dead_children.inspect}")
    logger.warn "this isn't necessarily incorrect -- look for the pids to be cleaned up next round, if not: call it a bug"
  end
end

#check_gc_configurations ⇒ `Object`

# File 'app/models/process/naf/runner.rb', line 103

def check_gc_configurations
  unless @disable_gc_modifications
    # These configuration changes will help forked processes, not the runner
    ENV['RUBY_HEAP_MIN_SLOTS'] = '500000'
    ENV['RUBY_HEAP_SLOTS_INCREMENT'] = '250000'
    ENV['RUBY_HEAP_SLOTS_GROWTH_FACTOR'] = '1'
    ENV['RUBY_GC_MALLOC_LIMIT'] = '50000000'
  end
end

#check_log_level ⇒ `Object`

# File 'app/models/process/naf/runner.rb', line 229

def check_log_level
  if machine.log_level != @last_machine_log_level
    @last_machine_log_level = machine.log_level
    unless @last_machine_log_level.blank?
      logging_configurator.parse_and_set_logger_levels(@last_machine_log_level)
    end
  end
end

#check_schedules ⇒ `Object`

# File 'app/models/process/naf/runner.rb', line 238

def check_schedules
  if ::Naf::Machine.is_it_time_to_check_schedules?(@check_schedules_period.minutes)
    logger.debug "it's time to check schedules"
    if ::Naf::ApplicationSchedule.try_lock_schedules
      logger.debug_gross "checking schedules"
      machine.mark_checked_schedule
      ::Naf::ApplicationSchedule.unlock_schedules

      # check scheduled tasks
      should_be_queued.each do |application_schedule|
        logger.info escape_html("scheduled application: #{application_schedule}")
        begin
          naf_boss = ::Logical::Naf::ConstructionZone::Boss.new
          # this doesn't work very well for run_group_limits in the thousands
          Range.new(0, application_schedule.application_run_group_limit || 1, true).each do
            naf_boss.enqueue_application_schedule(application_schedule)
          end
        rescue ::Naf::HistoricalJob::JobPrerequisiteLoop => jpl
          logger.error escape_html("#{machine} couldn't queue schedule because of prerequisite loop: #{jpl.message}")
          logger.warn jpl
          application_schedule.enabled = false
          application_schedule.save!
          logger.alarm escape_html("Application Schedule disabled due to loop: #{application_schedule}")
        end
      end

      # check the runner machines
      ::Naf::Machine.enabled.up.each do |runner_to_check|
        if runner_to_check.is_stale?(@runner_stale_period.minutes)
          logger.alarm escape_html("runner is stale for #{@runner_stale_period} minutes, #{runner_to_check}")
          runner_to_check.mark_machine_down(machine)
        end
      end
    end
  end
end

#cleanup_dead_child(pid, status) ⇒ `Object`

# File 'app/models/process/naf/runner.rb', line 335

def cleanup_dead_child(pid, status)
  child_job = @children.delete(pid)

  if child_job.present?
    # Update job tags
    child_job.remove_tags([::Naf::HistoricalJob::SYSTEM_TAGS[:work]])

    if status.nil? || status.exited? || status.signaled?
      logger.info { escape_html("cleaning up dead child: #{child_job.reload}") }
      finish_job(child_job,
                 { exit_status: (status && status.exitstatus), termination_signal: (status && status.termsig) })

      thread = @threads.delete(pid)
      logger.detail escape_html("cleaning up threads: #{thread.inspect}")
      logger.detail escape_html("thread list: #{Thread.list}")
      thread.join
    else
      # this can happen if the child is sigstopped
      logger.warn escape_html("child waited for did not exit: #{child_job}, status: #{status.inspect}")
    end
  else
    # XXX ERROR no child for returned pid -- this can't happen
    logger.warn "child pid: #{pid}, status: #{status.inspect}, not managed by this runner"
  end
end

#cleanup_dead_children ⇒ `Object`

# File 'app/models/process/naf/runner.rb', line 275

def cleanup_dead_children
  # clean up children that have exited
  logger.detail "cleaning up dead children: #{@children.length}"

  if @children.length > 0
    while @children.length > 0
      pid = nil
      status = nil
      begin
        Timeout::timeout(@loop_sleep_time) do
          pid, status = Process.waitpid2(-1)
        end
      rescue Timeout::Error
        check_dead_children_not_exited_properly
        break
      rescue Errno::ECHILD => e
        logger.error escape_html("#{machine} No child when we thought we had children #{@children.inspect}")
        logger.warn e
        pid = @children.first.try(:first)
        status = nil
        logger.warn "pulling first child off list to clean it up: pid=#{pid}"
      end

      if pid
        begin
          cleanup_dead_child(pid, status)
        rescue ActiveRecord::ActiveRecordError => are
          logger.error escape_html("Failure during cleaning up of dead child with pid: #{pid}, status: #{status}")
          logger.error escape_html("#{are.message}")
        rescue StandardError => e
          # XXX just incase a job control failure -- more code here
          logger.error "some failure during child clean up"
          logger.warn e
        end
      end
    end
  else
    logger.detail "sleeping in loop: #{@loop_sleep_time} seconds"
    sleep(@loop_sleep_time)
  end
end

#cleanup_old_processes ⇒ `Object`

# File 'app/models/process/naf/runner.rb', line 113

def cleanup_old_processes
  machine.machine_runners.each do |runner|
    runner.machine_runner_invocations.recently_marked_dead(24.hours).each do |invocation|
      terminate_old_processes(invocation)
    end
  end
end

#emergency_teardown ⇒ `Object`

kill(0, pid) seems to fail during at_exit block so this shoots from the hip

# File 'app/models/process/naf/runner.rb', line 494

def emergency_teardown
  return if @children.length == 0
  logger.warn "emergency teardown of #{@children.length} job(s)"
  @children.clone.each do |pid, child|
    send_signal_and_maybe_clean_up(child, "TERM")
  end
  sleep(2)
  @children.clone.each do |pid, child|
    send_signal_and_maybe_clean_up(child, "KILL")

    # force job down
    finish_job(child)
  end
end

#escape_html(str) ⇒ `Object`



659
660
661

# File 'app/models/process/naf/runner.rb', line 659

def escape_html(str)
  CGI::escapeHTML(str)
end

#finish_job(running_job, updates = {}) ⇒ `Object`

# File 'app/models/process/naf/runner.rb', line 480

def finish_job(running_job, updates = {})
  if running_job.present?
    running_job.remove_all_tags
    running_job.add_tags([::Naf::HistoricalJob::SYSTEM_TAGS[:cleanup]])
  end

  ::Naf::HistoricalJob.transaction do
    update_historical_job(updates.merge({ finished_at: Time.zone.now }), running_job.id)
    running_job.delete
  end
end

#is_job_process_alive?(job) ⇒ `Boolean`

Returns:

(Boolean)



586
587
588

# File 'app/models/process/naf/runner.rb', line 586

def is_job_process_alive?(job)
  return send_signal_and_maybe_clean_up(job, 0)
end

#log_output_until_job_finishes(job_id, stdout, stderr) ⇒ `Object`

# File 'app/models/process/naf/runner.rb', line 422

def log_output_until_job_finishes(job_id, stdout, stderr)
  log_file = ::Logical::Naf::LogFile.new("#{::Naf::PREFIX_PATH}/#{::Naf.schema_name}/jobs/#{job_id}")
  log_file.open

  # Continue reading logs from stdout/stderror until it reaches end of file
  while true
    read_pipes = []
    read_pipes << stdout if stdout
    read_pipes << stderr if stderr
    return if (read_pipes.length == 0)

    error_pipes = read_pipes.clone
    read_array, write_array, error_array = Kernel.select(read_pipes, nil, error_pipes, 1)

    unless error_array.blank?
      logger.error escape_html("job(#{job_id}): select returned error for #{error_pipes.inspect} (read_pipes: #{read_pipes.inspect})")
      # XXX we should probably close the errored FDs
    end

    unless read_array.blank?
      begin
        for r in read_array do
          begin
            # Parse each log line into JSON
            r.read_nonblock(10240).split("\n").each do |log|
              log_file << log.rstrip
            end
          rescue Errno::EAGAIN
          rescue Errno::EINTR
          rescue EOFError => eof
            stdout = nil if r == stdout
            stderr = nil if r == stderr
          end
        end
      ensure
        log_file.write
      end
    end
  end

  log_file.close
end

#memory_available_to_spawn? ⇒ `Boolean`

Returns:

(Boolean)

# File 'app/models/process/naf/runner.rb', line 644

def memory_available_to_spawn?
  Facter.clear
  memory_size = Facter.memorysize_mb.to_f
  memory_free = Facter.memoryfree_mb.to_f
  memory_free_percentage = (memory_free / memory_size) * 100.0

  if (memory_free_percentage >= @minimum_memory_free)
    logger.detail "memory available: #{memory_free_percentage}% (free) >= #{@minimum_memory_free}% (min percent)"
    return true
  end
  logger.alarm "#{Facter.hostname}.#{Facter.domain}: not enough memory to spawn: #{memory_free_percentage}% (free) < #{@minimum_memory_free}% (min percent)"

  return false
end

#retrieve_invocation_information ⇒ `Object`

# File 'app/models/process/naf/runner.rb', line 141

def retrieve_invocation_information
  begin
    repository_name = (`git remote -v`).slice(/:\S+/).sub('.git','')[1..-1]
    if repository_name.match(/fatal/)
      repository_name = nil
    end
  rescue
    repository_name = nil
  end
  branch_name = (`git rev-parse --abbrev-ref HEAD`).strip
  if branch_name.match(/fatal/)
    branch_name = nil
  end
  commit_information = (`git log --pretty="%H" -n 1`).strip
  if commit_information.match(/fatal/)
    commit_information = nil
  end
  deployment_tag = (`git describe --abbrev=0 --tag 2>&1`).strip
  if deployment_tag.match(/fatal: No names found, cannot describe anything/)
    deployment_tag = nil
  end

  {
    repository_name: repository_name,
    branch_name: branch_name,
    commit_information: commit_information,
    deployment_tag: deployment_tag
  }
end

#send_signal_and_maybe_clean_up(job, signal) ⇒ `Object`

# File 'app/models/process/naf/runner.rb', line 565

def send_signal_and_maybe_clean_up(job, signal)
  if job.pid.nil?
    finish_job(job)

    return false
  end

  begin
    retval = Process.kill(signal, job.pid)
    logger.detail "#{retval} = kill(#{signal}, #{job.pid})"
  rescue Errno::ESRCH
    logger.detail "ESRCH = kill(#{signal}, #{job.pid})"

    # job does not exist -- mark it finished
    finish_job(job)

    return false
  end
  return true
end

#should_be_queued ⇒ `Object`

# File 'app/models/process/naf/runner.rb', line 602

def should_be_queued
  not_finished_applications = ::Naf::HistoricalJob.
    queued_between(Time.zone.now - Naf::HistoricalJob::JOB_STALE_TIME, Time.zone.now).
    where("finished_at IS NULL AND request_to_terminate = false").
    find_all{ |job| job.application_id.present? }.
    index_by{ |job| job.application_id }

  application_last_runs = ::Naf::HistoricalJob.application_last_runs.
    index_by{ |job| job.application_id }

  # find the run_interval based schedules that should be queued
  # select anything that isn't currently running and completed
  # running more than run_interval minutes ago
  relative_schedules_what_need_queuin = ::Naf::ApplicationSchedule.where(enabled: true).relative_schedules.select do |schedule|
    (not_finished_applications[schedule.application_id].nil? &&
      (application_last_runs[schedule.application_id].nil? ||
        (Time.zone.now - application_last_runs[schedule.application_id].finished_at) > (schedule.run_interval.minutes)))
  end

  # find the run_start_minute based schedules
  # select anything that
  #  isn't currently running (or queued) AND
  #  hasn't run since run_start_time AND
  #  should have been run by now AND
  #  that should have run within fudge period AND
  exact_schedules_what_need_queuin = ::Naf::ApplicationSchedule.where(enabled: true).exact_schedules.select do |schedule|
    (not_finished_applications[schedule.application_id].nil? &&
      (application_last_runs[schedule.application_id].nil? ||
        ((Time.zone.now.to_date + schedule.run_start_minute.minutes) >= application_last_runs[schedule.application_id].finished_at)) &&
      (Time.zone.now - (Time.zone.now.to_date + schedule.run_start_minute.minutes)) >= 0.seconds &&
      ((Time.zone.now - (Time.zone.now.to_date + schedule.run_start_minute.minutes)) <= (@check_schedules_period * @schedule_fudge_scale).minutes)
    )
  end

  foreman = ::Logical::Naf::ConstructionZone::Foreman.new()
  return (relative_schedules_what_need_queuin + exact_schedules_what_need_queuin).select do |schedule|
    schedule.enqueue_backlogs || !foreman.limited_by_run_group?(schedule.application_run_group_restriction,
                                                                schedule.application_run_group_name,
                                                                schedule.application_run_group_limit)
  end
end

#start_new_jobs ⇒ `Object`

# File 'app/models/process/naf/runner.rb', line 361

def start_new_jobs
  # start new jobs
  logger.detail "starting new jobs, num children: #{@children.length}/#{machine.thread_pool_size}"
  # XXX while @children.length < machine.thread_pool_size && memory_available_to_spawn? && current_invocation.wind_down_at.blank?
  while ::Naf::RunningJob.where(started_on_machine_id: machine.id).count < machine.thread_pool_size &&
    memory_available_to_spawn? && current_invocation.wind_down_at.blank?

    logger.debug_gross "fetching jobs because: children: #{@children.length} < #{machine.thread_pool_size} (poolsize)"
    begin
      running_job = @job_fetcher.fetch_next_job

      unless running_job.present?
        logger.debug_gross "no more jobs to run"
        break
      end

      logger.info escape_html("starting new job : #{running_job.inspect}")

      # fork and run
      pid, stdin, stdout, stderr = running_job.historical_job.spawn
      stdin.close

      # Reset NAF_JOB_ID
      ENV.delete('NAF_JOB_ID')
      if pid
        @children[pid] = running_job
        running_job.pid = pid
        running_job.historical_job.pid = pid
        running_job.historical_job.failed_to_start = false
        running_job.historical_job.machine_runner_invocation_id = current_invocation.id
        logger.info escape_html("job started : #{running_job}")
        running_job.save!
        running_job.historical_job.save!

        # Spawn a thread to output the log of each job to files.
        #
        # Make sure not to execute any database calls inside this
        # block, as it will start an ActiveRecord connection for each
        # thread and eventually raise a ConnetionTimeoutError, resulting
        # the runner to exit.
        thread = Thread.new do
          log_output_until_job_finishes(running_job.id, stdout, stderr)
        end
        @threads[pid] = thread
      else
        # should never get here (well, hopefully)
        logger.error escape_html("#{machine}: failed to execute #{running_job}")

        finish_job(running_job, { failed_to_start: true })
      end
    rescue ActiveRecord::ActiveRecordError => are
      raise
    rescue StandardError => e
      # XXX rescue for various issues
      logger.error escape_html("#{machine}: failure during job start")
      logger.warn e
    end
  end
  logger.debug_gross "done starting jobs"
end

#terminate_old_processes(record) ⇒ `Object`

# File 'app/models/process/naf/runner.rb', line 509

def terminate_old_processes(record)
  # check if any processes are hanging around and ask them
  # politely if they will please terminate
  jobs = assigned_jobs(record)
  if jobs.length == 0
    logger.detail "no jobs to remove"
    return
  end
  logger.info "number of old jobs to sift through: #{jobs.length}"
  jobs.each do |job|
    logger.detail escape_html("job still around: #{job}")
    if job.request_to_terminate == false
      logger.warn "politely asking process: #{job.pid} to terminate itself"
      job.request_to_terminate = true
      job.save!
    end
  end

  # wait
  (1..@wait_time_for_processes_to_terminate).each do |i|
    num_assigned_jobs = assigned_jobs(record).length
    return if num_assigned_jobs == 0
    logger.debug_medium "#{i}/#{@wait_time_for_processes_to_terminate}: sleeping 1 second while we wait for " +
      "#{num_assigned_jobs} assigned job(s) to terminate as requested"
    sleep(1)
  end

  # nudge them to terminate
  jobs = assigned_jobs(record)
  if jobs.length == 0
    logger.debug_gross "assigned jobs have exited after asking to terminate nicely"
    return
  end
  jobs.each do |job|
    logger.warn escape_html("sending SIG_TERM to process: #{job}")
    send_signal_and_maybe_clean_up(job, "TERM")
  end

  # wait
  (1..5).each do |i|
    num_assigned_jobs = assigned_jobs(record).length
    return if num_assigned_jobs == 0
    logger.debug_medium "#{i}/5: sleeping 1 second while we wait for #{num_assigned_jobs} assigned job(s) to terminate from SIG_TERM"
    sleep(1)
  end

  # kill with fire
  assigned_jobs(record).each do |job|
    logger.alarm escape_html("sending SIG_KILL to process: #{job}")
    send_signal_and_maybe_clean_up(job, "KILL")

    # job force job down
    finish_job(job)
  end
end

#update_historical_job(updates, historical_job_id) ⇒ `Object`

XXX update_all doesn’t support “from_partition” so we have this helper

# File 'app/models/process/naf/runner.rb', line 466

def update_historical_job(updates, historical_job_id)
  updates[:updated_at] = Time.zone.now
  update_columns = updates.map{ |k,v| "#{k} = ?" }.join(", ")
  update_sql = <<-SQL
    UPDATE
      #{::Naf::HistoricalJob.partition_table_name(historical_job_id)}
    SET
      #{update_columns}
    WHERE
      id = ?
  SQL
  ::Naf::HistoricalJob.find_by_sql([update_sql] + updates.values + [historical_job_id])
end

#wind_down_runners ⇒ `Object`

# File 'app/models/process/naf/runner.rb', line 121

def wind_down_runners
  machine.machine_runners.each do |runner|
    runner.machine_runner_invocations.each do |invocation|
      if invocation.dead_at.blank?
        begin
          retval = Process.kill(0, invocation.pid)
          logger.detail "#{retval} = kill(0, #{invocation.pid}) -- process alive, marking runner invocation as winding down"
          invocation.wind_down_at = Time.zone.now
          invocation.save!
        rescue Errno::ESRCH
          logger.detail "ESRCH = kill(0, #{invocation.pid}) -- marking runner invocation as not running"
          invocation.dead_at = Time.zone.now
          invocation.save!
          terminate_old_processes(invocation)
        end
      end
    end
  end
end

#work ⇒ `Object`

# File 'app/models/process/naf/runner.rb', line 64

def work
  check_gc_configurations

  @machine = ::Naf::Machine.find_by_server_address(@server_address)

  unless machine.present?
    logger.fatal "This machine is not configued correctly (ipaddress: #{@server_address})."
    logger.fatal "Please update #{::Naf::Machine.table_name} with an entry for this machine."
    logger.fatal "Exiting..."
    exit 1
  end

  machine.lock_for_runner_use
  begin
    cleanup_old_processes
    wind_down_runners

    # Create a machine runner, if it doesn't exist
    machine_runner = ::Naf::MachineRunner.
      find_or_create_by_machine_id_and_runner_cwd(machine_id: machine.id,
                                                  runner_cwd: Dir.pwd)
    # Create an invocation for this runner
    @current_invocation = ::Naf::MachineRunnerInvocation.
      create!({ machine_runner_id: machine_runner.id,
                pid: Process.pid,
                uuid: @invocation_uuid }.merge!(retrieve_invocation_information))
  ensure
    machine.unlock_for_runner_use
  end

  begin
    work_machine
  ensure
    @current_invocation.dead_at = Time.zone.now
    @current_invocation.save!
    cleanup_old_processes
  end
end

#work_machine ⇒ `Object`

# File 'app/models/process/naf/runner.rb', line 171

def work_machine
  machine.mark_alive
  machine.mark_up

  # Make sure no processes are thought to be running on this machine
  terminate_old_processes(machine) if @kill_all_runners

  logger.info escape_html("working: #{machine}")

  @children = {}
  @threads = {}

  at_exit {
    ::Af::Application.singleton.emergency_teardown
  }

  @job_fetcher = ::Logical::Naf::JobFetcher.new(machine)

  while true
    break unless work_machine_loop
    GC.start
  end

  logger.info "runner quitting"
end

#work_machine_loop ⇒ `Object`

# File 'app/models/process/naf/runner.rb', line 197

def work_machine_loop
  machine.reload

  # Check machine status
  if !machine.enabled
    logger.warn escape_html("this machine is disabled #{machine}")
    return false
  elsif machine.marked_down
    logger.warn escape_html("this machine is marked down #{machine}")
    return false
  end

  machine.mark_alive

  check_log_level

  @current_invocation.reload
  if current_invocation.wind_down_at.present?
    logger.warn "invocation asked to wind down"
    if @children.length == 0
      return false;
    end
  else
    check_schedules
    start_new_jobs
  end

  cleanup_dead_children

  return true
end

Class: Process::Naf::Runner

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize ⇒ Runner

Instance Attribute Details

#current_invocation ⇒ Object

#machine ⇒ Object

Instance Method Details

#assigned_jobs(record) ⇒ Object

#check_dead_children_not_exited_properly ⇒ Object

#check_gc_configurations ⇒ Object

#check_log_level ⇒ Object

#check_schedules ⇒ Object

#cleanup_dead_child(pid, status) ⇒ Object

#cleanup_dead_children ⇒ Object

#cleanup_old_processes ⇒ Object

#emergency_teardown ⇒ Object

#escape_html(str) ⇒ Object

#finish_job(running_job, updates = {}) ⇒ Object

#is_job_process_alive?(job) ⇒ Boolean

#log_output_until_job_finishes(job_id, stdout, stderr) ⇒ Object

#memory_available_to_spawn? ⇒ Boolean

#retrieve_invocation_information ⇒ Object

#send_signal_and_maybe_clean_up(job, signal) ⇒ Object

#should_be_queued ⇒ Object

#start_new_jobs ⇒ Object

#terminate_old_processes(record) ⇒ Object

#update_historical_job(updates, historical_job_id) ⇒ Object

#wind_down_runners ⇒ Object

#work ⇒ Object

#work_machine ⇒ Object

#work_machine_loop ⇒ Object