Module: CodeRunner::Slurm
Overview
used on certain HPC systems.
Instance Method Summary
collapse
Methods included from Launcher
#cancel_job_launcher, #error_file_launcher, #execute_launcher, #launcher_prefix, #output_file_launcher, #queue_status_launcher, #use_launcher
Instance Method Details
#batch_script ⇒ Object
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
|
# File 'lib/coderunner/system_modules/slurm.rb', line 57
def batch_script
nodes, ppn = @nprocs.split(/x/)
eputs "Warning: Underuse of nodes (#{ppn} cores per node instead of #{max_ppn})" if ppn.to_i < max_ppn
raise "Error: cores per node cannot excede #{max_ppn}" if ppn.to_i > max_ppn
ppn ||= max_ppn
raise "Please specify wall minutes" unless @wall_mins
if @wall_mins
ep @wall_mins
hours = (@wall_mins / 60).floor
mins = @wall_mins.to_i % 60
secs = ((@wall_mins - @wall_mins.to_i) * 60).to_i
end
eputs "Allotted wall time is " + sprintf("%02d:%02d:%02d", hours, mins, secs)
nprocstot = nodes.to_i * ppn.to_i
<<EOF
#!/bin/bash
#SBATCH -J #{executable_name}.#{job_identifier} # jobname
#SBATCH -N #{nodes.to_i} # number of nodes
#SBATCH -n #{nprocstot} # number of tasks
#SBATCH -o #{executable_name}.#{job_identifier}.o%j # strout filename (%j is jobid)
#SBATCH -e #{executable_name}.#{job_identifier}.e%j # stderr filename (%j is jobid)
#{@project ? "#SBATCH -A #@project # project to charge" : ""}
#{@queue ? "#SBATCH -p #@queue # submission queue" : ""}
#{@wall_mins ? "#SBATCH -t #{sprintf("%02d:%02d:%02d", hours, mins, secs)} # walltime" : ""}
#{code_run_environment}
echo "Submitting #{nodes}x#{ppn} job on #{CodeRunner::SYS} for project #@project..."
EOF
end
|
#batch_script_file ⇒ Object
41
42
43
|
# File 'lib/coderunner/system_modules/slurm.rb', line 41
def batch_script_file
"#{executable_name}.#{job_identifier}.sh"
end
|
#cancel_job ⇒ Object
93
94
95
|
# File 'lib/coderunner/system_modules/slurm.rb', line 93
def cancel_job
use_launcher ? cancel_job_launcher : `scancel #{@job_no}`
end
|
#error_file ⇒ Object
97
98
99
100
|
# File 'lib/coderunner/system_modules/slurm.rb', line 97
def error_file
use_launcher ? error_file_launcher :
"#{executable_name}.#{job_identifier}.e#@job_no"
end
|
31
32
33
34
35
36
37
38
39
|
# File 'lib/coderunner/system_modules/slurm.rb', line 31
def execute
if use_launcher
return execute_launcher
else
File.open(batch_script_file, 'w'){|file| file.puts batch_script + run_command + "\n"}
_pid = %x[sbatch #{batch_script_file}].to_i
return nil
end
end
|
#get_run_status(job_no, current_status) ⇒ Object
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
|
# File 'lib/coderunner/system_modules/slurm.rb', line 107
def get_run_status(job_no, current_status)
if use_launcher
return :Unknown
end
line = current_status.split(/\n/).grep(Regexp.new(job_no.to_s))[0]
unless line
return :Unknown
else
@running = true
if line =~ /\sPD\s/
return :Queueing
elsif line =~ /\sR\s/
return :Running
elsif line =~ /\sC\s/
@running = false
return :Unknown
else
ep 'line', line
raise 'Could not get run status'
end
end
end
|
45
46
47
|
# File 'lib/coderunner/system_modules/slurm.rb', line 45
def max_ppn
raise "Please define max_ppn for your system"
end
|
25
26
27
28
29
|
# File 'lib/coderunner/system_modules/slurm.rb', line 25
def mpi_prog
nodes, ppn = @nprocs.split(/x/)
nprocstot = nodes.to_i * ppn.to_i
"mpirun -np #{nprocstot}"
end
|
50
51
52
|
# File 'lib/coderunner/system_modules/slurm.rb', line 50
def nodes
@nprocs.split(/x/)[0].to_i
end
|
#output_file ⇒ Object
102
103
104
105
|
# File 'lib/coderunner/system_modules/slurm.rb', line 102
def output_file
use_launcher ? output_file_launcher :
"#{executable_name}.#{job_identifier}.o#@job_no"
end
|
53
54
55
|
# File 'lib/coderunner/system_modules/slurm.rb', line 53
def ppn
@nprocs.split(/x/)[1].to_i
end
|
#queue_status ⇒ Object
8
9
10
11
12
13
14
15
|
# File 'lib/coderunner/system_modules/slurm.rb', line 8
def queue_status
if use_launcher
queue_status_launcher
else
%x[squeue -u $USER]
end
end
|
#run_command ⇒ Object
17
18
19
20
21
22
23
24
|
# File 'lib/coderunner/system_modules/slurm.rb', line 17
def run_command
if use_launcher
return %[mpiexec -np #{@nprocs} #{executable_location}/#{executable_name} #{parameter_string} > #{output_file_launcher} 2> #{error_file_launcher}]
else
"#@preamble #{mpi_prog} #{executable_location}/#{executable_name} #{parameter_string}"
end
end
|