Module: CodeRunner::Slurm
Overview
used on certain HPC systems.
Instance Method Summary
collapse
Methods included from Launcher
#cancel_job_launcher, #error_file_launcher, #execute_launcher, #launcher_prefix, #output_file_launcher, #queue_status_launcher, #use_launcher
Instance Method Details
#batch_script ⇒ Object
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
|
# File 'lib/coderunner/system_modules/slurm.rb', line 49
def batch_script
nodes, ppn = @nprocs.split(/x/)
eputs "Warning: Underuse of nodes (#{ppn} cores per node instead of #{max_ppn})" if ppn.to_i < max_ppn
raise "Error: cores per node cannot excede #{max_ppn}" if ppn.to_i > max_ppn
ppn ||= max_ppn
raise "Please specify wall minutes" unless @wall_mins
if @wall_mins
ep @wall_mins
hours = (@wall_mins / 60).floor
mins = @wall_mins.to_i % 60
secs = ((@wall_mins - @wall_mins.to_i) * 60).to_i
end
eputs "Allotted wall time is " + sprintf("%02d:%02d:%02d", hours, mins, secs)
nprocstot = nodes.to_i * ppn.to_i
<<EOF
#!/bin/bash
#SBATCH -J #{executable_name}.#{job_identifier} # jobname
#SBATCH -N #{nodes.to_i} # number of nodes
#SBATCH -n #{nprocstot} # number of tasks
#SBATCH -o #{executable_name}.#{job_identifier}.o%j # strout filename (%j is jobid)
#SBATCH -e #{executable_name}.#{job_identifier}.e%j # stderr filename (%j is jobid)
#{@project ? "#SBATCH -A #@project # project to charge" : ""}
#{@queue ? "#SBATCH -p #@queue # submission queue" : ""}
#{@wall_mins ? "#SBATCH -t #{sprintf("%02d:%02d:%02d", hours, mins, secs)} # walltime" : ""}
#{code_run_environment}
echo "Submitting #{nodes}x#{ppn} job on #{CodeRunner::SYS} for project #@project..."
EOF
end
|
#batch_script_file ⇒ Object
41
42
43
|
# File 'lib/coderunner/system_modules/slurm.rb', line 41
def batch_script_file
"#{executable_name}.#{job_identifier}.sh"
end
|
#cancel_job ⇒ Object
85
86
87
|
# File 'lib/coderunner/system_modules/slurm.rb', line 85
def cancel_job
use_launcher ? cancel_job_launcher : `scancel #{@job_no}`
end
|
#error_file ⇒ Object
89
90
91
92
|
# File 'lib/coderunner/system_modules/slurm.rb', line 89
def error_file
use_launcher ? error_file_launcher :
"#{executable_name}.#{job_identifier}.e#@job_no"
end
|
31
32
33
34
35
36
37
38
39
|
# File 'lib/coderunner/system_modules/slurm.rb', line 31
def execute
if use_launcher
return execute_launcher
else
File.open(batch_script_file, 'w'){|file| file.puts batch_script + run_command + "\n"}
_pid = %x[sbatch #{batch_script_file}].to_i
return nil
end
end
|
#get_run_status(job_no, current_status) ⇒ Object
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
|
# File 'lib/coderunner/system_modules/slurm.rb', line 99
def get_run_status(job_no, current_status)
if use_launcher
return :Unknown
end
line = current_status.split(/\n/).grep(Regexp.new(job_no.to_s))[0]
unless line
return :Unknown
else
@running = true
if line =~ /\sPD\s/
return :Queueing
elsif line =~ /\sR\s/
return :Running
elsif line =~ /\sC\s/
@running = false
return :Unknown
else
ep 'line', line
raise 'Could not get run status'
end
end
end
|
45
46
47
|
# File 'lib/coderunner/system_modules/slurm.rb', line 45
def max_ppn
raise "Please define max_ppn for your system"
end
|
25
26
27
28
29
|
# File 'lib/coderunner/system_modules/slurm.rb', line 25
def mpi_prog
nodes, ppn = @nprocs.split(/x/)
nprocstot = nodes.to_i * ppn.to_i
"mpirun -np #{nprocstot}"
end
|
#output_file ⇒ Object
94
95
96
97
|
# File 'lib/coderunner/system_modules/slurm.rb', line 94
def output_file
use_launcher ? output_file_launcher :
"#{executable_name}.#{job_identifier}.o#@job_no"
end
|
#queue_status ⇒ Object
8
9
10
11
12
13
14
15
|
# File 'lib/coderunner/system_modules/slurm.rb', line 8
def queue_status
if use_launcher
queue_status_launcher
else
%x[squeue -u $USER]
end
end
|
#run_command ⇒ Object
17
18
19
20
21
22
23
24
|
# File 'lib/coderunner/system_modules/slurm.rb', line 17
def run_command
if use_launcher
return %[mpiexec -np #{@nprocs} #{executable_location}/#{executable_name} #{parameter_string} > #{output_file_launcher} 2> #{error_file_launcher}]
else
"#@preamble #{mpi_prog} #{executable_location}/#{executable_name} #{parameter_string}"
end
end
|