Class: Workflow::LocalExecutor
- Inherits:
-
Object
- Object
- Workflow::LocalExecutor
- Defined in:
- lib/scout/workflow/deployment/local.rb
Defined Under Namespace
Classes: NoWork
Instance Attribute Summary collapse
-
#available_resources ⇒ Object
Returns the value of attribute available_resources.
-
#resources_requested ⇒ Object
Returns the value of attribute resources_requested.
-
#resources_used ⇒ Object
Returns the value of attribute resources_used.
-
#timer ⇒ Object
Returns the value of attribute timer.
Class Method Summary collapse
- .candidates(batches) ⇒ Object
- .process(*args) ⇒ Object
- .produce(jobs, rules = {}, produce_cpus: Etc.nprocessors, produce_timer: 1) ⇒ Object
- .produce_dependencies(jobs, tasks, rules = {}, produce_cpus: Etc.nprocessors, produce_timer: 1) ⇒ Object
-
.purge_duplicates(batches) ⇒ Object
{{{ HELPER.
- .sort_candidates(batches) ⇒ Object
Instance Method Summary collapse
- #check_resources(batch) ⇒ Object
- #clear_batch(batches, batch) ⇒ Object
- #erase_job_dependencies(job, batches) ⇒ Object
-
#initialize(timer = 5, available_resources = nil) ⇒ LocalExecutor
constructor
A new instance of LocalExecutor.
- #process(rules, jobs = nil) ⇒ Object
- #process_batches(batches) ⇒ Object
- #release_resources(job) ⇒ Object
- #run_batch(batch) ⇒ Object
Constructor Details
#initialize(timer = 5, available_resources = nil) ⇒ LocalExecutor
Returns a new instance of LocalExecutor.
39 40 41 42 43 44 45 |
# File 'lib/scout/workflow/deployment/local.rb', line 39 def initialize(timer = 5, available_resources = nil) available_resources = {:cpus => Etc.nprocessors } if available_resources.nil? @timer = timer @available_resources = IndiferentHash.setup(available_resources) @resources_requested = IndiferentHash.setup({}) @resources_used = IndiferentHash.setup({}) end |
Instance Attribute Details
#available_resources ⇒ Object
Returns the value of attribute available_resources.
37 38 39 |
# File 'lib/scout/workflow/deployment/local.rb', line 37 def available_resources @available_resources end |
#resources_requested ⇒ Object
Returns the value of attribute resources_requested.
37 38 39 |
# File 'lib/scout/workflow/deployment/local.rb', line 37 def resources_requested @resources_requested end |
#resources_used ⇒ Object
Returns the value of attribute resources_used.
37 38 39 |
# File 'lib/scout/workflow/deployment/local.rb', line 37 def resources_used @resources_used end |
#timer ⇒ Object
Returns the value of attribute timer.
37 38 39 |
# File 'lib/scout/workflow/deployment/local.rb', line 37 def timer @timer end |
Class Method Details
.candidates(batches) ⇒ Object
291 292 293 294 295 296 297 298 299 300 301 |
# File 'lib/scout/workflow/deployment/local.rb', line 291 def self.candidates(batches) leaf_nodes = batches.select{|b| b[:deps].empty? } leaf_nodes.reject!{|b| Workflow::Orchestrator.done_batch?(b) } leaf_nodes = purge_duplicates leaf_nodes leaf_nodes = sort_candidates leaf_nodes leaf_nodes end |
.process(*args) ⇒ Object
5 6 7 |
# File 'lib/scout/workflow/deployment/local.rb', line 5 def self.process(*args) self.new.process(*args) end |
.produce(jobs, rules = {}, produce_cpus: Etc.nprocessors, produce_timer: 1) ⇒ Object
9 10 11 12 13 14 15 16 |
# File 'lib/scout/workflow/deployment/local.rb', line 9 def self.produce(jobs, rules = {}, produce_cpus: Etc.nprocessors, produce_timer: 1) jobs = [jobs] unless Array === jobs orchestrator = self.new produce_timer.to_f, cpus: produce_cpus.to_i begin orchestrator.process(rules, jobs) rescue self::NoWork end end |
.produce_dependencies(jobs, tasks, rules = {}, produce_cpus: Etc.nprocessors, produce_timer: 1) ⇒ Object
18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
# File 'lib/scout/workflow/deployment/local.rb', line 18 def self.produce_dependencies(jobs, tasks, rules = {}, produce_cpus: Etc.nprocessors, produce_timer: 1) jobs = [jobs] unless Array === jobs tasks = tasks.collect{|task| (String === task) ? task.to_sym : task } produce_list = [] jobs.each do |job| next if job.done? || job.running? job.rec_dependencies.each do |dep| task_name = dep.task_name.to_sym task_name = task_name.to_sym if String === task_name produce_list << dep if tasks.include?(task_name) || tasks.include?(job.task_name.to_s) || tasks.include?(job.full_task_name) end end produce(produce_list, rules, produce_cpus: produce_cpus, produce_timer: produce_timer) end |
.purge_duplicates(batches) ⇒ Object
{{{ HELPER
271 272 273 274 275 276 277 278 279 280 281 282 |
# File 'lib/scout/workflow/deployment/local.rb', line 271 def self.purge_duplicates(batches) seen = Set.new batches.select do |batch| path = batch[:top_level].path if seen.include? path false else seen << path true end end end |
.sort_candidates(batches) ⇒ Object
284 285 286 287 288 289 |
# File 'lib/scout/workflow/deployment/local.rb', line 284 def self.sort_candidates(batches) seen = Set.new batches.sort_by do |batch| - batch[:resources].values.compact.select{|e| Numeric === e }.inject(0.0){|acc,e| acc += e} end end |
Instance Method Details
#check_resources(batch) ⇒ Object
170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 |
# File 'lib/scout/workflow/deployment/local.rb', line 170 def check_resources(batch) resources = batch[:resources] job = batch[:top_level] limit_resources = resources.select do |resource,value| value && available_resources[resource] && ((resources_requested[resource] || 0) + value) > available_resources[resource] end.collect do |resource,v| resource end if limit_resources.any? Log.debug "Orchestrator waiting on #{job.path} due to #{limit_resources * ", "}" else resources_used[job] = resources resources.each do |resource,value| resources_requested[resource] ||= 0 resources_requested[resource] += value.to_i end Log.low "Orchestrator producing #{job.path} with resources #{resources}" return yield end end |
#clear_batch(batches, batch) ⇒ Object
259 260 261 262 263 264 265 266 267 |
# File 'lib/scout/workflow/deployment/local.rb', line 259 def clear_batch(batches, batch) job = batch[:top_level] parents = batches.select do |b| b[:deps].include? batch end parents.each{|b| b[:deps].delete batch } end |
#erase_job_dependencies(job, batches) ⇒ Object
231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 |
# File 'lib/scout/workflow/deployment/local.rb', line 231 def erase_job_dependencies(job, batches) all_jobs = batches.collect{|b| b[:jobs] }.flatten top_level_jobs = batches.collect{|b| b[:top_level] } job.dependencies.each do |dep| batch = batches.select{|b| b[:jobs].include? dep}.first next unless batch rules = batch[:rules] next unless rules[:erase].to_s == 'true' dep_path = dep.path parents = all_jobs.select do |parent| parent.rec_dependencies.include?(dep) end next if parents.select{|parent| ! parent.done? }.any? parents.each do |parent| Log.high "Erasing #{dep.path} from #{parent.path}" parent.archive_deps parent.copy_linked_files_dir parent.dependencies = parent.dependencies - [dep] end dep.clean end end |
#process(rules, jobs = nil) ⇒ Object
142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
# File 'lib/scout/workflow/deployment/local.rb', line 142 def process(rules, jobs = nil) jobs, rules = rules, {} if jobs.nil? jobs = [jobs] if Step === jobs batches = Workflow::Orchestrator.job_batches(rules, jobs) batches.each do |batch| rules = IndiferentHash.setup batch[:rules] rules.delete :erase if jobs.include?(batch[:top_level]) resources = Workflow::Orchestrator.normalize_resources_from_rules(rules) resources = IndiferentHash.add_defaults resources, rules[:default_resources] if rules[:default_resources] batch[:resources] = resources batch[:rules] = rules end process_batches(batches) end |
#process_batches(batches) ⇒ Object
47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
# File 'lib/scout/workflow/deployment/local.rb', line 47 def process_batches(batches) retry_jobs = [] failed_jobs = [] while batches.reject{|b| Workflow::Orchestrator.done_batch?(b) }.any? candidates = Workflow::LocalExecutor.candidates(batches) top_level_jobs = candidates.collect{|batch| batch[:top_level] } raise NoWork, "No candidates and no running jobs #{Log.fingerprint batches}" if resources_used.empty? && top_level_jobs.empty? if candidates.reject{|batch| failed_jobs.include? batch[:top_level] }.empty? && resources_used.empty? && top_level_jobs.empty? exception = failed_jobs.collect(&:get_exception).compact.first if exception Log.warn 'Some work failed' raise exception else raise 'Some work failed' end end candidates.each do |batch| begin job = batch[:top_level] case when (job.error? || job.aborted?) begin if job.recoverable_error? if retry_jobs.include?(job) Log.warn "Failed twice #{job.path} with recoverable error" retry_jobs.delete job failed_jobs << job next else retry_jobs << job job.clean raise TryAgain end else failed_jobs << job Log.warn "Non-recoverable error in #{job.path}" next end ensure Log.warn "Releases resources from failed job: #{job.path}" release_resources(job) end when job.done? Log.debug "Orchestrator done #{job.path}" release_resources(job) clear_batch(batches, batch) erase_job_dependencies(job, batches) when job.running? next else check_resources(batch) do run_batch(batch) end end rescue TryAgain retry end end batches.each do |batch| job = batch[:top_level] if job.done? || job.aborted? || job.error? job.join if job.done? clear_batch(batches, batch) release_resources(job) erase_job_dependencies(job, batches) end end sleep timer end batches.each{|batch| job = batch[:top_level] begin job.join rescue Log.warn "Job #{job.short_path} ended with exception #{$!.class.to_s}: #{$!.message}" end } batches.each{|batch| job = batch[:top_level] erase_job_dependencies(job, batches) if job.done? } end |
#release_resources(job) ⇒ Object
159 160 161 162 163 164 165 166 167 168 |
# File 'lib/scout/workflow/deployment/local.rb', line 159 def release_resources(job) if resources_used[job] Log.debug "Orchestrator releasing resouces from #{job.path}" resources_used[job].each do |resource,value| next if resource == 'size' resources_requested[resource] -= value.to_i end resources_used.delete job end end |
#run_batch(batch) ⇒ Object
195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 |
# File 'lib/scout/workflow/deployment/local.rb', line 195 def run_batch(batch) job, job_rules = batch.values_at :top_level, :rules rules = batch[:rules] deploy = rules[:deploy] if rules case deploy when nil, 'local', :local, :serial, 'serial' Scout::Config.with_config do job_rules[:config_keys].split(/,\s*/).each do |config| Scout::Config.process_config config end if job_rules && job_rules[:config_keys] log = job_rules[:log] if job_rules log = Log.severity if log.nil? Log.with_severity log do job.fork end end when 'batch', 'sched', 'slurm', 'pbs', 'lsf' job.init_info Workflow::Scheduler.process_batches([batch]) job.join else require 'scout/offsite' if deploy.end_with?('-batch') server = deploy.sub('-batch','') OffsiteStep.setup(job, server: server, batch: true) else OffsiteStep.setup(job, server: deploy) end job.produce job.join end end |