Module: Bio::Pipengine
- Defined in:
- lib/bio/pipengine.rb,
lib/bio/pipengine/job.rb,
lib/bio/pipengine/step.rb,
lib/bio/pipengine/sample.rb
Defined Under Namespace
Constant Summary collapse
- @@logger_error =
Logger.new(STDERR)
Class Method Summary collapse
-
.add_job(job, pipeline, step_name, sample) ⇒ Object
def self.check_config unless File.exists?(“#Dir.home/.torque_rm.yaml”) ARGV.clear current_user = Etc.getlogin puts “nIt seems you are running PipEngine for the first time.
-
.check_and_run_multi(samples_file, pipeline, samples_list, options) ⇒ Object
handle steps that run on multiple samples (i.e. sample groups job).
-
.check_samples(passed_samples, samples) ⇒ Object
check if sample exists.
-
.check_steps(passed_steps, pipeline) ⇒ Object
check if step exists.
- .create_job(samples_file, pipeline, samples_list, options, sample) ⇒ Object
-
.create_samples(dir) ⇒ Object
create the samples.yml file.
- .include(name, filename) ⇒ Object
-
.inspect_steps(pipeline_file) ⇒ Object
load the pipeline file and show a list of available steps.
-
.load_samples_file(file) ⇒ Object
add_job.
- .parse_tag_option(option_tag) ⇒ Object
- .run(options) ⇒ Object
Class Method Details
.add_job(job, pipeline, step_name, sample) ⇒ Object
def self.check_config unless File.exists?(“#Dir.home/.torque_rm.yaml”) ARGV.clear current_user = Etc.getlogin puts “nIt seems you are running PipEngine for the first time. Please fill in the following information:” print “nHostname or IP address of authorized server from where jobs will be submitted: ”.light_blue server = gets.chomp print “n” print “Specify the username you will be using to connect and submit jobs [#current_user]: ”.light_blue username = gets.chomp username = (username == “”) ? current_user : username puts “Attempting connection to the server…”.green path = ‘ssh #username@#server -t “which qsub”`.split(“/qsub”).first unless path=~//S+/S+/ warn “Connection problems detected! Please check that you are able to connect to ’#server‘ as ’#username‘ via ssh.”.red else file = File.open(“#Dir.home/.torque_rm.yaml”,“w”) file.write(=> server, :path => path, :user => username.to_yaml) file.close puts “First time configuration completed!”.green puts “It is strongly recommended to setup a password-less SSH connection to use PipEngine.”.green exit end end end #check_config
253 254 255 256 257 |
# File 'lib/bio/pipengine.rb', line 253 def self.add_job(job, pipeline, step_name, sample) step = Bio::Pipengine::Step.new(step_name,pipeline["steps"][step_name]) # parsing step instructions self.add_job(job, pipeline, step.pre, sample) if step.has_prerequisite? job.add_step(step,sample) # adding step command lines to the job end |
.check_and_run_multi(samples_file, pipeline, samples_list, options) ⇒ Object
handle steps that run on multiple samples (i.e. sample groups job)
84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
# File 'lib/bio/pipengine.rb', line 84 def self.check_and_run_multi(samples_file,pipeline,samples_list,) step_multi = [:steps].map {|s| Bio::Pipengine::Step.new(s,pipeline["steps"][s]).is_multi?} if step_multi.include? false if step_multi.uniq.size > 1 @@logger_error.error "\nAbort! You are trying to run both multi-samples and single sample steps in the same job".red exit else return false end else samples_obj = {} samples_list.each_key {|sample_name| samples_obj[sample_name] = Bio::Pipengine::Sample.new(sample_name,samples_list[sample_name],[:group])} create_job(samples_file,pipeline,samples_list,,samples_obj) return true end end |
.check_samples(passed_samples, samples) ⇒ Object
check if sample exists
145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 |
# File 'lib/bio/pipengine.rb', line 145 def self.check_samples(passed_samples,samples) passed_samples.each do |sample| samples_names = [] samples["samples"].each_key do |k| if samples["samples"][k].kind_of? Hash samples["samples"][k].each_key {|s| samples_names << s} else samples_names << k end end unless samples_names.include? sample @@logger_error.error "Sample \"#{sample}\" does not exist in sample file!".red exit end end end |
.check_steps(passed_steps, pipeline) ⇒ Object
check if step exists
163 164 165 166 167 168 169 170 |
# File 'lib/bio/pipengine.rb', line 163 def self.check_steps(passed_steps,pipeline) passed_steps.each do |step| unless pipeline["steps"].keys.include? step @@logger_error.error "Step \"#{step}\" does not exist in pipeline file!".red exit end end end |
.create_job(samples_file, pipeline, samples_list, options, sample) ⇒ Object
102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 |
# File 'lib/bio/pipengine.rb', line 102 def self.create_job(samples_file,pipeline,samples_list,,sample) # getting the sample name (only if this is not a multi samples job) sample_name = (sample.kind_of? Hash) ? nil : sample.name+"-" # setting the job name job_name = nil if [:name] job_name = [:name] elsif [:steps].size > 1 job_name = "#{sample_name}#{[:steps].join("-")}" else job_name = "#{sample_name}#{[:steps].first}" end # creating the Job object job = Bio::Pipengine::Job.new(job_name) job.local = [:tmp] job.custom_output = [:output_dir] job.custom_name = ([:name]) ? [:name] : nil # Adding pipeline and samples resources job.add_resources pipeline["resources"] job.add_resources samples_file["resources"] # Adding resource tag from the command line which can overwrite resources defined in the pipeline and samples files job.add_resources parse_tag_option([:tag]) #setting the logging system job.log = [:log] job.log_adapter = [:log_adapter] # setting sample groups either by cli option (if present) or by taking all available samples job.multi_samples = ([:multi]) ? [:multi] : samples_list.keys job.samples_obj = sample if sample.kind_of? Hash # cycling through steps and add command lines to the job [:steps].each do |step_name| # TODO WARNING this can add multiple times the same step if the are multi dependencies self.add_job(job, pipeline, step_name, sample) end if [:dry] job.to_script() else job.to_script() job.submit end end |
.create_samples(dir) ⇒ Object
create the samples.yml file
186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 |
# File 'lib/bio/pipengine.rb', line 186 def self.create_samples(dir) File.open("samples.yml","w") do |file| file.write "resources:\n\soutput: #{`pwd -L`}\n\nsamples:\n" samples = Hash.new {|hash,key| hash[key] = []} dir.each do |path| projects = Dir.glob(path+"/*").sort.select {|folders| folders.split("/")[-1] =~/Project_/} unless projects.empty? projects.each do |project_folder| Dir.glob(project_folder+"/*").sort.each {|s| samples[s.split("/")[-1]] << s} end else Dir.glob(path+"/*").sort.each {|s| samples[s.split("/")[-1]] << s if Dir.exists? s} end end samples.each_key do |sample| file.write "\s"+sample+":\s"+samples[sample].join(",")+"\n" end end end |
.include(name, filename) ⇒ Object
4 5 6 |
# File 'lib/bio/pipengine.rb', line 4 def self.include(name, filename) File.readlines(filename).map {|line| " "+line}.join("\n") end |
.inspect_steps(pipeline_file) ⇒ Object
load the pipeline file and show a list of available steps
173 174 175 176 177 178 179 180 181 182 183 |
# File 'lib/bio/pipengine.rb', line 173 def self.inspect_steps(pipeline_file) pipeline = YAML.load_file pipeline_file print "\nPipeline: ".blue print "#{pipeline["pipeline"]}\n\n".green puts "List of available steps:".light_blue pipeline["steps"].each_key do |s| print "\s\s#{s}:\s\s".blue print "#{pipeline["steps"][s]["desc"]}\n".green end puts "\n" end |
.load_samples_file(file) ⇒ Object
add_job
259 260 261 262 263 264 265 266 267 268 269 270 271 272 |
# File 'lib/bio/pipengine.rb', line 259 def self.load_samples_file(file) samples_file = YAML.load_file file samples_file["samples"].each do |k,v| if v.kind_of? Hash samples_file["samples"][k] = Hash[samples_file["samples"][k].map{ |key, value| [key.to_s, value.to_s] }] else samples_file["samples"][k] = v.to_s end end # make sure everything in Samples and Resources is converted to string #samples_file["samples"] = Hash[samples_file["samples"].map{ |key, value| [key.to_s, value.to_s] }] samples_file["resources"] = Hash[samples_file["resources"].map {|k,v| [k.to_s, v.to_s]}] samples_file end |
.parse_tag_option(option_tag) ⇒ Object
65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
# File 'lib/bio/pipengine.rb', line 65 def self.parse_tag_option(option_tag) if !option_tag return {} else = {} option_tag.each do |tag| values = tag.split("=") if values.empty? @@logger_error.error "\nAbort! Unrecognized values for tag option, please provide the tags as follows: tag1=value1 tag2=value2".red exit else .merge! Hash[*values.flatten] end end return end end |
.run(options) ⇒ Object
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
# File 'lib/bio/pipengine.rb', line 9 def self.run() # reading the yaml files pipeline = YAML.load ERB.new(File.read([:pipeline])).result(binding) samples_file = load_samples_file [:samples_file] # make sure all sample names are always Strings converted_samples_list = {} samples_file["samples"].each_key do |sample| if samples_file["samples"][sample].kind_of? Hash # it's a group of samples converted_samples_list[sample.to_s] = Hash[samples_file["samples"][sample].map{ |k, v| [k.to_s, v] }] else converted_samples_list[sample.to_s] = samples_file["samples"][sample] end end samples_file["samples"] = converted_samples_list # replacing original samples hash with the converted one # pre-running checks check_steps([:steps],pipeline) check_samples([:samples],samples_file) if [:samples] # list of samples the jobs will work on samples_list = nil # check if a group is specified if [:group] samples_list = [:samples] ? samples_file["samples"][[:group]].select {|k,v| [:samples].include? k} : samples_file["samples"][[:group]] [:multi] = samples_list.keys samples_file["resources"]["output"] << "/#{[:group]}" else # if not, proceed normalizing the sample list to remove groups and get a list of all samples full_list_samples = {} samples_file["samples"].each_key do |k| if samples_file["samples"][k].kind_of? Hash full_list_samples.merge! samples_file["samples"][k] else full_list_samples[k] = samples_file["samples"][k] end end samples_list = [:samples] ? full_list_samples.select {|k,v| [:samples].include? k} : full_list_samples end ########### START ########### # create output directory (jobs scripts will be saved there) FileUtils.mkdir_p samples_file["resources"]["output"] #unless options[:dry] #&& options[:spooler]!="pbs" # check if the requested steps are multi-samples run_multi = check_and_run_multi(samples_file,pipeline,samples_list,) unless run_multi # there are no multi-samples steps, so iterate on samples and create one job per sample samples_list.each_key do |sample_name| sample = Bio::Pipengine::Sample.new(sample_name.to_s,samples_list[sample_name],[:group]) create_job(samples_file,pipeline,samples_list,,sample) end end end |