Class: WorkflowManager::FGCZDebian10CourseCluster
- Defined in:
- lib/workflow_manager/cluster.rb
Instance Attribute Summary
Attributes inherited from Cluster
Instance Method Summary collapse
- #cluster_nodes ⇒ Object
- #copy_commands(org_dir, dest_parent_dir, now = nil) ⇒ Object
- #delete_command(target) ⇒ Object
- #job_ends?(log_file) ⇒ Boolean
- #job_pending?(job_id) ⇒ Boolean
- #job_running?(job_id) ⇒ Boolean
- #kill_command(job_id) ⇒ Object
- #submit_job(script_file, script_content, option = '') ⇒ Object
Methods inherited from Cluster
#default_node, #generate_new_job_script, #initialize, #node_list
Constructor Details
This class inherits a constructor from WorkflowManager::Cluster
Instance Method Details
#cluster_nodes ⇒ Object
504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 |
# File 'lib/workflow_manager/cluster.rb', line 504 def cluster_nodes nodes = { 'fgcz-h-900: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-900', 'fgcz-h-901: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-901', 'fgcz-h-902: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-902', 'fgcz-h-903: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-903', 'fgcz-h-904: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-904', 'fgcz-h-905: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-905', 'fgcz-h-906: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-906', 'fgcz-h-907: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-907', 'fgcz-h-908: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-908', 'fgcz-h-909: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-909', 'fgcz-h-910: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-910', 'fgcz-h-911: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-911', 'fgcz-h-912: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-912', 'fgcz-h-913: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-913', 'fgcz-h-914: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-914', 'fgcz-h-915: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-915', 'fgcz-h-916: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-916', 'fgcz-h-917: cpu 8,mem 30 GB,scr 500G' => 'fgcz-h-917', } end |
#copy_commands(org_dir, dest_parent_dir, now = nil) ⇒ Object
495 496 497 |
# File 'lib/workflow_manager/cluster.rb', line 495 def copy_commands(org_dir, dest_parent_dir, now=nil) commands = ["cp -r #{org_dir} #{dest_parent_dir}"] end |
#delete_command(target) ⇒ Object
501 502 503 |
# File 'lib/workflow_manager/cluster.rb', line 501 def delete_command(target) command = "rm -rf #{target}" end |
#job_ends?(log_file) ⇒ Boolean
470 471 472 473 474 475 476 477 478 479 480 481 |
# File 'lib/workflow_manager/cluster.rb', line 470 def job_ends?(log_file) log_flag = false IO.popen("tail -n 10 #{log_file} 2> /dev/null") do |io| while line=io.gets if line =~ /__SCRIPT END__/ log_flag = true break end end end log_flag end |
#job_pending?(job_id) ⇒ Boolean
482 483 484 485 486 487 488 489 490 491 492 493 494 |
# File 'lib/workflow_manager/cluster.rb', line 482 def job_pending?(job_id) qstat_flag = false IO.popen('squeue') do |io| while line=io.gets jobid, partition, name, user, state, *others = line.chomp.split if jobid.strip == job_id and state =~ /PD/ qstat_flag = true break end end end qstat_flag end |
#job_running?(job_id) ⇒ Boolean
455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 |
# File 'lib/workflow_manager/cluster.rb', line 455 def job_running?(job_id) qstat_flag = false IO.popen('squeue') do |io| while line=io.gets # ["JOBID", "PARTITION", "NAME", "USER", "ST", "TIME", "NODES", "NODELIST(REASON)"] # ["206", "employee", "test.sh", "masaomi", "R", "0:03", "1", "fgcz-h-030"] jobid, partition, name, user, state, *others = line.chomp.split if jobid.strip == job_id and state == 'R' qstat_flag = true break end end end qstat_flag end |
#kill_command(job_id) ⇒ Object
498 499 500 |
# File 'lib/workflow_manager/cluster.rb', line 498 def kill_command(job_id) command = "scancel #{job_id}" end |
#submit_job(script_file, script_content, option = '') ⇒ Object
438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 |
# File 'lib/workflow_manager/cluster.rb', line 438 def submit_job(script_file, script_content, option='') if script_name = File.basename(script_file) and script_name =~ /\.sh/ script_name = script_name.split(/\.sh/).first + ".sh" new_job_script = generate_new_job_script(script_name, script_content) new_job_script_base = File.basename(new_job_script) log_file = File.join(@log_dir, new_job_script_base + "_o.log") err_file = File.join(@log_dir, new_job_script_base + "_e.log") command = "g-sub -o #{log_file} -e #{err_file} -q course #{option} #{new_job_script}" job_id = `#{command}` job_id = job_id.chomp.split.last [job_id, log_file, command] else err_msg = "FGCZDebian10CourseCluster#submit_job, ERROR: script_name is not *.sh: #{File.basename(script_file)}" warn err_msg raise err_msg end end |