Module: CodeRunner::Slurm

Includes:: Launcher

Included in:: Helios, Stampede

Defined in:: lib/coderunner/system_modules/slurm.rb

Overview

used on certain HPC systems.

Instance Method Summary collapse

Methods included from Launcher

#cancel_job_launcher, #error_file_launcher, #execute_launcher, #launcher_prefix, #output_file_launcher, #queue_status_launcher, #use_launcher

Instance Method Details

#batch_script ⇒ `Object`

# File 'lib/coderunner/system_modules/slurm.rb', line 49

def batch_script

	nodes, ppn = @nprocs.split(/x/)
	eputs "Warning: Underuse of nodes (#{ppn} cores per node instead of #{max_ppn})" if ppn.to_i < max_ppn 
	raise "Error: cores per node cannot excede #{max_ppn}" if ppn.to_i > max_ppn
#		raise "Error: project (i.e. budget) not specified" unless @project
	ppn ||= max_ppn
	raise "Please specify wall minutes" unless @wall_mins
	if @wall_mins
		ep @wall_mins
		hours = (@wall_mins / 60).floor
		mins = @wall_mins.to_i % 60
		secs = ((@wall_mins - @wall_mins.to_i) * 60).to_i
	end
	eputs "Allotted wall time is " + sprintf("%02d:%02d:%02d", hours, mins, secs)
	nprocstot = nodes.to_i * ppn.to_i
<<EOF
#!/bin/bash
#SBATCH -J #{executable_name}.#{job_identifier} # jobname
#SBATCH -N #{nodes.to_i}        # number of nodes
#SBATCH -n #{nprocstot}         # number of tasks
#SBATCH -o #{executable_name}.#{job_identifier}.o%j              # strout filename (%j is jobid)
#SBATCH -e #{executable_name}.#{job_identifier}.e%j               # stderr filename (%j is jobid)
#{@project ? "#SBATCH -A #@project # project to charge" : ""}
#{@queue ? "#SBATCH -p #@queue # submission queue" : ""}
#{@wall_mins ? "#SBATCH -t #{sprintf("%02d:%02d:%02d", hours, mins, secs)} # walltime" : ""}

#{code_run_environment}
echo "Submitting #{nodes}x#{ppn} job on #{CodeRunner::SYS} for project #@project..."



EOF

end

#batch_script_file ⇒ `Object`



41
42
43

# File 'lib/coderunner/system_modules/slurm.rb', line 41

def batch_script_file
	"#{executable_name}.#{job_identifier}.sh"
end

#cancel_job ⇒ `Object`



85
86
87

# File 'lib/coderunner/system_modules/slurm.rb', line 85

def cancel_job
	use_launcher ? cancel_job_launcher : `scancel #{@job_no}`
end

#error_file ⇒ `Object`

# File 'lib/coderunner/system_modules/slurm.rb', line 89

def error_file
	use_launcher ? error_file_launcher :
     "#{executable_name}.#{job_identifier}.e#@job_no"
end

#execute ⇒ `Object`

# File 'lib/coderunner/system_modules/slurm.rb', line 31

def execute
	if use_launcher
     return execute_launcher
	else
		File.open(batch_script_file, 'w'){|file| file.puts batch_script + run_command + "\n"}
		_pid = %x[sbatch #{batch_script_file}].to_i
		return nil
	end
end

#get_run_status(job_no, current_status) ⇒ `Object`

# File 'lib/coderunner/system_modules/slurm.rb', line 99

def get_run_status(job_no, current_status)
	if use_launcher
		return :Unknown
	end
	line = current_status.split(/\n/).grep(Regexp.new(job_no.to_s))[0]
	unless line
		return :Unknown
	else 
		@running = true
		if line =~ /\sPD\s/
			return :Queueing
		elsif line =~ /\sR\s/
			return :Running
		elsif line =~ /\sC\s/
			@running = false
			return :Unknown
		else
			ep 'line', line
			raise 'Could not get run status'
		end
	end
end

#max_ppn ⇒ `Object`



45
46
47

# File 'lib/coderunner/system_modules/slurm.rb', line 45

def max_ppn
	raise "Please define max_ppn for your system"
end

#mpi_prog ⇒ `Object`

# File 'lib/coderunner/system_modules/slurm.rb', line 25

def mpi_prog
 nodes, ppn = @nprocs.split(/x/)
 nprocstot = nodes.to_i * ppn.to_i
  "mpirun -np #{nprocstot}"
end

#output_file ⇒ `Object`

# File 'lib/coderunner/system_modules/slurm.rb', line 94

def output_file
	use_launcher ? output_file_launcher :
     "#{executable_name}.#{job_identifier}.o#@job_no"
end

#queue_status ⇒ `Object`

# File 'lib/coderunner/system_modules/slurm.rb', line 8

def queue_status
	if use_launcher
     queue_status_launcher
	else
		#%x[squeue | grep #{ENV['USER'][0..7]}]
		%x[squeue -u $USER]
	end
end

#run_command ⇒ `Object`

# File 'lib/coderunner/system_modules/slurm.rb', line 17

def run_command
# 		"qsub #{batch_script_file}"
	if use_launcher
		return %[mpiexec -np #{@nprocs} #{executable_location}/#{executable_name} #{parameter_string} > #{output_file_launcher} 2> #{error_file_launcher}]
	else
		"#@preamble #{mpi_prog}  #{executable_location}/#{executable_name} #{parameter_string}"
	end
end

Module: CodeRunner::Slurm

Overview

Instance Method Summary collapse

Methods included from Launcher

Instance Method Details

#batch_script ⇒ Object

#batch_script_file ⇒ Object

#cancel_job ⇒ Object

#error_file ⇒ Object

#execute ⇒ Object

#get_run_status(job_no, current_status) ⇒ Object

#max_ppn ⇒ Object

#mpi_prog ⇒ Object

#output_file ⇒ Object

#queue_status ⇒ Object

#run_command ⇒ Object

#batch_script ⇒ `Object`

#batch_script_file ⇒ `Object`

#cancel_job ⇒ `Object`

#error_file ⇒ `Object`

#execute ⇒ `Object`

#get_run_status(job_no, current_status) ⇒ `Object`

#max_ppn ⇒ `Object`

#mpi_prog ⇒ `Object`

#output_file ⇒ `Object`

#queue_status ⇒ `Object`

#run_command ⇒ `Object`