Class: Origen::Application::LSFManager

Inherits:
Object
  • Object
show all
Includes:
Callbacks
Defined in:
lib/origen/application/lsf_manager.rb

Overview

This class is responsible for co-ordinating and monitoring all submissions to the LSF. This is in contrast to Origen::Application::LSF which is an API for talking to the LSF.

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Callbacks

#register_callback_listener

Constructor Details

#initializeLSFManager

Returns a new instance of LSFManager.



14
15
16
17
18
# File 'lib/origen/application/lsf_manager.rb', line 14

def initialize
  unless File.exist?(log_file_directory)
    FileUtils.mkdir_p(log_file_directory)
  end
end

Instance Attribute Details

#current_commandObject

This will be set by the command dispatcher to reflect the currently executing command. If LSF jobs are spawned with the same command then any options passed to the parent command will automatically be forwarded to the children.



12
13
14
# File 'lib/origen/application/lsf_manager.rb', line 12

def current_command
  @current_command
end

Instance Method Details

#add_command_option(*opts) ⇒ Object



481
482
483
484
# File 'lib/origen/application/lsf_manager.rb', line 481

def add_command_option(*opts)
  @command_options ||= []
  @command_options += opts
end

#build_log(options = {}) ⇒ Object

Build the log file from the completed jobs



255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
# File 'lib/origen/application/lsf_manager.rb', line 255

def build_log(options = {})
  log_method = options[:log_file] ? options[:log_file] : :info
  Origen.log.send(log_method, '*' * 70)
  completed_jobs.each do |job|
    File.open(log_file(job[:id])) do |f|
      last_line_blank = false
      f.readlines.each do |line|
        # Capture and combine the per job stats that look like this:
        #   Total patterns:   1              1347      0.003674
        #   New patterns:     0
        #   Changed patterns: 1
        #   FAILED patterns:  1
        #   Total files:      1
        #   New files:        0
        #   Changed files:    0
        #   FAILED files:     1
        begin
          line.gsub!(/\e\[\d+m/, '')  # Remove any coloring
          if line =~ /Total patterns:\s+(\d+)/
            stats.completed_patterns += Regexp.last_match[1].to_i
          elsif line =~ /Total vectors:\s+(\d+)/
            stats.total_vectors += Regexp.last_match[1].to_i
          elsif line =~ /Total duration:\s+(\d+\.\d+)/
            stats.total_duration += Regexp.last_match[1].to_f
          elsif line =~ /Total files:\s+(\d+)/
            stats.completed_files += Regexp.last_match[1].to_i
          elsif line =~ /Changed patterns:\s+(\d+)/
            stats.changed_patterns += Regexp.last_match[1].to_i
          elsif line =~ /Changed files:\s+(\d+)/
            stats.changed_files += Regexp.last_match[1].to_i
          elsif line =~ /New patterns:\s+(\d+)/
            stats.new_patterns += Regexp.last_match[1].to_i
          elsif line =~ /New files:\s+(\d+)/
            stats.new_files += Regexp.last_match[1].to_i
          elsif line =~ /FAILED patterns:\s+(\d+)/
            stats.failed_patterns += Regexp.last_match[1].to_i
          elsif line =~ /FAILED files:\s+(\d+)/
            stats.failed_files += Regexp.last_match[1].to_i
          elsif line =~ /ERROR!/
            stats.errors += 1
            Origen.log.send :relog, line, options
          else
            # Compress multiple blank lines
            if line =~ /^\s*$/ || line =~ /.*\|\|\s*$/
              unless last_line_blank
                Origen.log.send(log_method, nil)
                last_line_blank = true
              end
            else
              # Screen std origen output
              unless line =~ /  origen save/ ||
                     line =~ /Insecure world writable dir/ ||
                     line =~ /To save all of/
                line.strip!
                Origen.log.send :relog, line, options
                last_line_blank = false
              end
            end
          end
        rescue
          # Sometimes illegal UTF-8 characters can get into crash dumps, if this
          # happens just print the line out rather than die
          Origen.log.error line
        end
      end
    end
  end
  Origen.log.send(log_method, '*' * 70)
  stats.print_summary
end

#classify_jobsObject



490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
# File 'lib/origen/application/lsf_manager.rb', line 490

def classify_jobs
  clear_caches
  queuing_job_ids = lsf.queuing_job_ids
  running_job_ids = lsf.running_job_ids
  remote_jobs.each do |_id, job|
    # If the status has already been determined send it straight to the bucket
    if job[:status]
      send("#{job[:status]}_jobs") << job
    else
      if job[:lsf_id] == :error
        job[:status] = :lost
        lost_jobs << job
      elsif job_completed?(job[:id])
        if job_passed?(job[:id])
          job[:status] = :passed
          passed_jobs << job
        elsif job_failed?(job[:id])
          job[:status] = :failed
          failed_jobs << job
        end
      else
        if running_job_ids.include?(job[:lsf_id])
          running_jobs << job
          # Once we have assigned a job as running make sure the job is marked as started
          # It can flicker back to queued if the started file takes a long time to arrive
          # from the remote host
          job_started(job[:lsf_id])
        elsif queuing_job_ids.include?(job[:lsf_id])
          queuing_jobs << job
        elsif job_started?(job[:id])
          # There can be considerable latency between the job writing the passed/failed
          # file remotely and it showing up on the local machine.
          # Give some buffer to that before declaring the file lost.
          if job[:completed_at]
            if (Time.now - job[:completed_at]) < 60
              running_jobs << job
            else
              lost_jobs << job
            end
          else
            job[:completed_at] = Time.now
            running_jobs << job
          end
        # Give jobs submitted less than a minute ago the benefit of the
        # doubt, they may not have shown up in bjobs yet
        elsif (Time.now - job[:submitted_at]) < 60
          queuing_jobs << job
        else
          lost_jobs << job
        end
      end
    end
  end
end

#clear(options) ⇒ Object

Clear jobs from memory



207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
# File 'lib/origen/application/lsf_manager.rb', line 207

def clear(options)
  if options[:type]
    if options[:type] == :all
      File.delete(remote_jobs_file) if File.exist?(remote_jobs_file)
      @remote_jobs = {}
      return
    else
      send("#{options[:type]}_jobs").each do |job|
        remote_jobs.delete(job[:id])
      end
    end
  else
    remote_jobs.delete(options[:id])
  end
end

#clear_allObject



223
224
225
226
227
228
229
230
231
# File 'lib/origen/application/lsf_manager.rb', line 223

def clear_all
  File.delete(remote_jobs_file) if File.exist?(remote_jobs_file)
  if File.exist?(log_file_directory)
    FileUtils.rm_rf(log_file_directory)
  end
  FileUtils.mkdir_p(log_file_directory)
  @remote_jobs = {}
  clear_caches
end

#clear_cachesObject



545
546
547
548
549
550
551
# File 'lib/origen/application/lsf_manager.rb', line 545

def clear_caches
  @running_jobs = nil
  @queuing_jobs = nil
  @passed_jobs = nil
  @failed_jobs = nil
  @lost_jobs = nil
end

#command_options(command_str) ⇒ Object



436
437
438
439
440
441
442
443
444
445
# File 'lib/origen/application/lsf_manager.rb', line 436

def command_options(command_str)
  command_str.sub(/origen\s*/, '') =~ /(\w+)/
  command = Regexp.last_match[1]
  command = ORIGEN_COMMAND_ALIASES[command] || command
  if command == current_command
    @command_options
  else
    ''
  end
end

#command_options=(opts) ⇒ Object

This will be called by the command dispatcher to record any options that were passed in when launching the current command. These will be automatically appended if the current command spawns any LSF jobs that will invoke the same command.



451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
# File 'lib/origen/application/lsf_manager.rb', line 451

def command_options=(opts)
  # Ensure these options are removed, these are either incompatible with the LSF,
  # or will already have been added elsewhere
  {
    ['-h', '--help']        => false,
    ['-w', '--wait']        => false,
    ['-d', '--debug']       => false,
    ['-c', '--continue']    => false,
    '--exec_remote'         => false,
    ['-t', '--target']      => '*',
    ['-e', '--environment'] => '*',
    '--id'                  => '*',
    ['-l', '--lsf']         => %w(add clear)
  }.each do |names, values|
    [names].flatten.each do |name|
      ix = opts.index(name)
      if ix
        opts.delete_at(ix)
        [values].flatten.each do |value|
          if value && (value == '*' || opts[ix] == value)
            opts.delete_at(ix)
          end
        end
      end
    end
  end
  @command_options ||= []
  @command_options += opts
end

#command_prefix(id, dependents) ⇒ Object



422
423
424
425
426
427
428
429
430
431
432
433
434
# File 'lib/origen/application/lsf_manager.rb', line 422

def command_prefix(id, dependents)
  # define prefix as a blank string if Origen.site_config.lsf_command_prefix is not defined
  if Origen.site_config.lsf_command_prefix
    prefix = Origen.site_config.lsf_command_prefix
  else
    prefix = ''
  end
  prefix += "cd #{Origen.root}; origen l --execute --id #{id} "
  unless dependents.empty?
    prefix += "--dependents #{dependents.join(',')} "
  end
  prefix
end

#completed_jobsObject



561
562
563
# File 'lib/origen/application/lsf_manager.rb', line 561

def completed_jobs
  passed_jobs + failed_jobs
end

#execute_remotely(options = {}) ⇒ Object



630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
# File 'lib/origen/application/lsf_manager.rb', line 630

def execute_remotely(options = {})
  job_started(options[:id])
  begin
    if options[:dependents]
      wait_for_completion(ids:                      options[:dependents],
                          poll_duration_in_seconds: 1,
                          # Don't wait long by the time this runs the LSF
                          # should have guaranteed the job has run
                          timeout_in_seconds:       120
                         )
      unless options[:dependents].all? { |id| job_passed?(id) }
        File.open(log_file(options[:id]), 'w') do |f|
          f.puts "*** ERROR! *** #{options[:cmd].join(' ')} ***"
          f.puts 'Dependents failed!'
        end
        fail 'Dependents failed!'
      end
    end
    if options[:cmd].is_a?(Array)
      cmd = options[:cmd].join(' ')
    else
      cmd = options[:cmd]
    end
    output = `#{cmd} 2>&1`
    File.open(log_file(options[:id]), 'w') do |f|
      f.write output
    end
    if $CHILD_STATUS.success?
      job_passed(options[:id])
    else
      job_failed(options[:id])
    end
  rescue
    job_failed(options[:id])
  end
end

#extract_ids(jobs_or_ids) ⇒ Object



400
401
402
# File 'lib/origen/application/lsf_manager.rb', line 400

def extract_ids(jobs_or_ids)
  jobs_or_ids.map { |j| j.is_a?(Hash) ? j[:id] : j }
end

#failed_file(id) ⇒ Object



340
341
342
# File 'lib/origen/application/lsf_manager.rb', line 340

def failed_file(id)
  "#{log_file_directory}/#{log_file_name(id)}.failed"
end

#failed_jobsObject

Failed jobs are those that started to produce a log file but did not complete



570
571
572
# File 'lib/origen/application/lsf_manager.rb', line 570

def failed_jobs
  @failed_jobs ||= []
end

#generate_job_idObject



604
605
606
# File 'lib/origen/application/lsf_manager.rb', line 604

def generate_job_id
  "#{Time.now.to_f}".gsub('.', '')
end

#job_completed?(id) ⇒ Boolean

Returns trus if the given job ID generated a complete file when run on the LSF. The complete file is created at the end of a job run and its presence indicates that the job ran and got past the generation/compile stage without crashing.

Returns:

  • (Boolean)


583
584
585
586
# File 'lib/origen/application/lsf_manager.rb', line 583

def job_completed?(id)
  job_started?(id) &&
    (job_passed?(id) || job_failed?(id))
end

#job_failed(id) ⇒ Object

Register that the given job ID has failed on the LSF



359
360
361
# File 'lib/origen/application/lsf_manager.rb', line 359

def job_failed(id)
  `touch #{failed_file(id)}`
end

#job_failed?(id) ⇒ Boolean

Returns:

  • (Boolean)


600
601
602
# File 'lib/origen/application/lsf_manager.rb', line 600

def job_failed?(id)
  File.exist?(failed_file(id))
end

#job_passed(id) ⇒ Object

Register that the given job ID has completed successfully on the LSF



354
355
356
# File 'lib/origen/application/lsf_manager.rb', line 354

def job_passed(id)
  `touch #{passed_file(id)}`
end

#job_passed?(id) ⇒ Boolean

Returns:

  • (Boolean)


596
597
598
# File 'lib/origen/application/lsf_manager.rb', line 596

def job_passed?(id)
  File.exist?(passed_file(id))
end

#job_running?(id) ⇒ Boolean

Returns:

  • (Boolean)


588
589
590
# File 'lib/origen/application/lsf_manager.rb', line 588

def job_running?(id)
  !job_completed?(id)
end

#job_started(id) ⇒ Object



363
364
365
# File 'lib/origen/application/lsf_manager.rb', line 363

def job_started(id)
  `touch #{started_file(id)}`
end

#job_started?(id) ⇒ Boolean

Returns:

  • (Boolean)


592
593
594
# File 'lib/origen/application/lsf_manager.rb', line 592

def job_started?(id)
  File.exist?(started_file(id))
end

#log_file(id) ⇒ Object

Returns the logfile that should be used by a given process on the LSF, this should be be guaranteed to be unique



328
329
330
# File 'lib/origen/application/lsf_manager.rb', line 328

def log_file(id)
  "#{log_file_directory}/#{log_file_name(id)}"
end

#log_file_directoryObject



349
350
351
# File 'lib/origen/application/lsf_manager.rb', line 349

def log_file_directory
  "#{Origen.root}/.lsf/remote_logs"
end

#log_file_name(id) ⇒ Object



344
345
346
347
# File 'lib/origen/application/lsf_manager.rb', line 344

def log_file_name(id)
  # host = `hostname`.strip
  "#{id}.txt"
end

#lost_jobsObject

Lost jobs are ones that for whatever reason did not start, or at least get far enough to log that they started



576
577
578
# File 'lib/origen/application/lsf_manager.rb', line 576

def lost_jobs
  @lost_jobs ||= []
end

#lsfObject

Picks and returns either the application’s LSF instance or the global LSF instance



21
22
23
24
25
26
27
# File 'lib/origen/application/lsf_manager.rb', line 21

def lsf
  if Origen.running_globally?
    Origen.lsf!
  else
    Origen.app.lsf
  end
end

#on_origen_shutdown(_options = {}) ⇒ Object



620
621
622
# File 'lib/origen/application/lsf_manager.rb', line 620

def on_origen_shutdown(_options = {})
  save_remote_jobs if @remote_jobs
end

#outstanding_jobs?Boolean

Returns:

  • (Boolean)


202
203
204
# File 'lib/origen/application/lsf_manager.rb', line 202

def outstanding_jobs?
  (running_jobs + queuing_jobs).size > 0
end

#passed_file(id) ⇒ Object



332
333
334
# File 'lib/origen/application/lsf_manager.rb', line 332

def passed_file(id)
  "#{log_file_directory}/#{log_file_name(id)}.passed"
end

#passed_jobsObject



565
566
567
# File 'lib/origen/application/lsf_manager.rb', line 565

def passed_jobs
  @passed_jobs ||= []
end


137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
# File 'lib/origen/application/lsf_manager.rb', line 137

def print_details(options = {})
  if options[:id]
    Origen.log.info "Job: #{options[:id]}"
    Origen.log.info '----' + '-' * options[:id].length
    print_details_of(remote_jobs[options[:id]])
  else
    options[:type] ||= :all
    if options[:type] == :all || options[:type] == :queuing
      Origen.log.info ''
      Origen.log.info 'Queuing'
      Origen.log.info '-------'
      queuing_jobs.each { |j| print_details_of(j) }
    end
    if options[:type] == :all || options[:type] == :running
      Origen.log.info ''
      Origen.log.info 'Running'
      Origen.log.info '-------'
      running_jobs.each { |j| print_details_of(j) }
    end
    if options[:type] == :all || options[:type] == :lost
      Origen.log.info ''
      Origen.log.info 'Lost'
      Origen.log.info '----'
      lost_jobs.each { |j| print_details_of(j) }
    end
    if options[:type] == :all || options[:type] == :passed
      Origen.log.info ''
      Origen.log.info 'Passed'
      Origen.log.info '------'
      passed_jobs.each { |j| print_details_of(j) }
    end
    if options[:type] == :all || options[:type] == :failed
      Origen.log.info ''
      Origen.log.info 'Failed'
      Origen.log.info '------'
      failed_jobs.each { |j| print_details_of(j) }
    end
  end
end


177
178
179
180
181
182
# File 'lib/origen/application/lsf_manager.rb', line 177

def print_details_of(job)
  Origen.log.info "#{job[:command]} #{job[:switches]}".gsub(' --exec_remote', '')
  Origen.log.info "ID: #{job[:id]}"
  Origen.log.info "Submitted: #{time_ago(job[:submitted_at])}"
  Origen.log.info ''
end


87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# File 'lib/origen/application/lsf_manager.rb', line 87

def print_status(options = {})
  options = {
    print_insructions: true
  }.merge(options)
  if options[:verbose]
    print_details(options)
  end
  Origen.log.info ''
  Origen.log.info 'LSF Status'
  Origen.log.info '----------'
  Origen.log.info "Queuing:    #{queuing_jobs.size}"
  Origen.log.info "Running:    #{running_jobs.size}"
  Origen.log.info "Lost:       #{lost_jobs.size}"
  Origen.log.info ''
  Origen.log.info "Passed:     #{passed_jobs.size}"
  Origen.log.info "Failed:     #{failed_jobs.size}"
  Origen.log.info ''
  if options[:print_insructions]
    Origen.log.info 'Common tasks'
    Origen.log.info '------------'
    if queuing_jobs.size > 0
      Origen.log.info 'Queuing'
      Origen.log.info ' Show details: origen l -v -t queuing'
      Origen.log.info ' Re-submit:    origen l -r -t queuing'
    end
    if running_jobs.size > 0
      Origen.log.info 'Running'
      Origen.log.info ' Show details: origen l -v -t running'
      Origen.log.info ' Re-submit:    origen l -r -t running'
    end
    if lost_jobs.size > 0
      Origen.log.info 'Lost'
      Origen.log.info ' Show details: origen l -v -t lost'
      Origen.log.info ' Re-submit:    origen l -r -t lost'
    end
    if passed_jobs.size > 0
      Origen.log.info 'Passed'
      Origen.log.info ' Build log:    origen l -l'
    end
    if failed_jobs.size > 0
      Origen.log.info 'Failed'
      Origen.log.info ' Show details: origen l -v -t failed'
      Origen.log.info ' Re-submit:    origen l -r -t failed'
    end
    Origen.log.info ''
    Origen.log.info 'Reset the LSF manager (clear all jobs): origen lsf -c -t all'
    Origen.log.info ''
  end
end

#queuing_jobsObject



557
558
559
# File 'lib/origen/application/lsf_manager.rb', line 557

def queuing_jobs
  @queuing_jobs ||= []
end

#remote_jobsObject



486
487
488
# File 'lib/origen/application/lsf_manager.rb', line 486

def remote_jobs
  @remote_jobs ||= restore_remote_jobs || {}
end

#remote_jobs_fileObject



29
30
31
# File 'lib/origen/application/lsf_manager.rb', line 29

def remote_jobs_file
  "#{Origen.root}/.lsf/remote_jobs"
end

#restore_remote_jobsObject



608
609
610
611
612
613
614
615
616
617
618
# File 'lib/origen/application/lsf_manager.rb', line 608

def restore_remote_jobs
  if File.exist?(remote_jobs_file)
    File.open(remote_jobs_file) do |f|
      begin
        Marshal.load(f)
      rescue
        nil
      end
    end
  end
end

#resubmit(options) ⇒ Object

Resubmit jobs



234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
# File 'lib/origen/application/lsf_manager.rb', line 234

def resubmit(options)
  if options[:type]
    if options[:type] == :all
      remote_jobs.each do |_id, job|
        resubmit_job(job)
      end
    else
      send("#{options[:type]}_jobs").each do |job|
        resubmit_job(job)
      end
    end
  else
    resubmit_job(remote_jobs[options[:id]])
  end
end

#resubmit_job(job) ⇒ Object



367
368
369
370
371
372
373
374
375
376
# File 'lib/origen/application/lsf_manager.rb', line 367

def resubmit_job(job)
  [log_file(job[:id]), passed_file(job[:id]), failed_file(job[:id]), started_file(job[:id])].each do |file|
    FileUtils.rm_f(file) if File.exist?(file)
  end
  job[:lsf_id] = lsf.submit(command_prefix(job[:id], job[:dependents_ids]) + job[:command] + job[:switches], dependents: job[:dependents_lsf_ids])
  job[:status] = nil
  job[:completed_at] = nil
  job[:submitted_at] = Time.now
  job[:submissions] += 1
end

#running_jobsObject



553
554
555
# File 'lib/origen/application/lsf_manager.rb', line 553

def running_jobs
  @running_jobs ||= []
end

#save_remote_jobsObject



624
625
626
627
628
# File 'lib/origen/application/lsf_manager.rb', line 624

def save_remote_jobs
  File.open(remote_jobs_file, 'w') do |f|
    Marshal.dump(@remote_jobs, f)
  end
end

#started_file(id) ⇒ Object



336
337
338
# File 'lib/origen/application/lsf_manager.rb', line 336

def started_file(id)
  "#{log_file_directory}/#{log_file_name(id)}.started"
end

#statsObject



250
251
252
# File 'lib/origen/application/lsf_manager.rb', line 250

def stats
  Origen.app.stats
end

#submit_job(command, options = {}) ⇒ Object



378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
# File 'lib/origen/application/lsf_manager.rb', line 378

def submit_job(command, options = {})
  options = {
    lsf_option_string: ''
  }.merge(options)
  switches = [' ', options[:lsf_option_string], command_options(command)].flatten.compact.join(' ')
  id = generate_job_id
  dependents_ids = extract_ids([options[:depend], options[:depends], options[:dependent], options[:dependents]].flatten.compact)
  dependents_lsf_ids = dependents_ids.map { |dep_id| remote_jobs[dep_id][:lsf_id] }
  lsf_id = lsf.submit(command_prefix(id, dependents_ids) + command + switches, dependents: dependents_lsf_ids)
  job_attrs = {
    id:                 id,
    lsf_id:             lsf_id,
    command:            command,
    submitted_at:       Time.now,
    submissions:        1,
    switches:           switches,
    dependents_ids:     dependents_ids,
    dependents_lsf_ids: dependents_lsf_ids
  }
  remote_jobs[id] = job_attrs
end

#submit_origen_job(cmd, options = {}) ⇒ Object



404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
# File 'lib/origen/application/lsf_manager.rb', line 404

def submit_origen_job(cmd, options = {})
  if options[:action]
    action = options[:action] == :pattern ? ' generate' : " #{options[:action]}"
  else
    action = ''
  end

  str = "#{action} #{cmd}".strip
  str.sub!('origen ', '') if str =~ /^origen /

  # Append the --exec_remote switch to all Origen commands, this allows command
  # processing to be altered based on whether it is running locally or
  # remotely by testing Origen.running_remotely?
  str += ' --exec_remote'

  submit_job("origen #{str}", options)
end

#time_ago(time) ⇒ Object



184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
# File 'lib/origen/application/lsf_manager.rb', line 184

def time_ago(time)
  seconds = (Time.now - time).to_i
  if seconds < 60
    unit = 'second'
    number = seconds
  elsif seconds < 3600
    unit = 'minute'
    number = seconds / 60
  elsif seconds < 86_400
    unit = 'hour'
    number = seconds / 3600
  else
    unit = 'day'
    number = seconds / 86_400
  end
  "#{number} #{unit}#{number > 1 ? 's' : ''} ago"
end

#wait_for_completion(options = {}) ⇒ Object

Waits for all jobs to complete, will retry lost jobs (optionally failed jobs).

Alternatively supply an :id or an array of :ids to wait only for specific job(s) to complete.



38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# File 'lib/origen/application/lsf_manager.rb', line 38

def wait_for_completion(options = {})
  options = {
    max_lost_retries:         10,
    max_fail_retries:         0,
    poll_duration_in_seconds: 10,
    timeout_in_seconds:       3600
  }.merge(options)
  options[:start_time] ||= Time.now
  if Time.now - options[:start_time] < options[:timeout_in_seconds]
    # When waiting for ids we will hold by monitoring for the result
    # files directly, rather than using the generatic classify routine.
    # This is because the most common use case for this is when jobs
    # are idling remotely on the LSF and don't want to run into contention
    # issues when multiple processes try to classify/save the status.
    if options[:id] || options[:ids]
      ids = extract_ids([options[:id], options[:ids]].flatten.compact)
      if ids.any? { |id| job_running?(id) }
        sleep options[:poll_duration_in_seconds]
        wait_for_completion(options)
      end

    else
      classify_jobs
      print_status(print_insructions: false)
      sleep options[:poll_duration_in_seconds]
      classify_jobs
      resumitted = false
      lost_jobs.each do |job|
        if job[:submissions] < options[:max_lost_retries] + 1
          resubmit_job(job)
          resumitted = true
        end
      end
      failed_jobs.each do |job|
        if job[:submissions] < options[:max_fail_retries] + 1
          resubmit_job(job)
          resumitted = true
        end
      end
      classify_jobs
      if outstanding_jobs? || resumitted
        wait_for_completion(options)
      else
        print_status
      end
    end
  end
end