Class: MiGA::Project
- Inherits:
-
Object
- Object
- MiGA::Project
- Defined in:
- lib/miga/project.rb
Constant Summary collapse
- @@FOLDERS =
Class
%w(data metadata daemon)
- @@DATA_FOLDERS =
%w( 01.raw_reads 02.trimmed_reads 03.read_quality 04.trimmed_fasta 05.assembly 06.cds 07.annotation 07.annotation/01.function 07.annotation/02.taxonomy 07.annotation/01.function/01.essential 07.annotation/01.function/02.ssu 07.annotation/02.taxonomy/01.mytaxa 07.annotation/03.qa 07.annotation/03.qa/01.checkm 07.annotation/03.qa/02.mytaxa_scan 08.mapping 08.mapping/01.read-ctg 08.mapping/02.read-gene 09.distances 09.distances/01.haai 09.distances/02.aai 09.distances/03.ani 09.distances/04.ssu 10.clades 10.clades/01.find 10.clades/02.ani 10.clades/03.ogs 10.clades/04.phylogeny 10.clades/04.phylogeny/01.essential 10.clades/04.phylogeny/02.core 10.clades/05.metadata)
- @@RESULT_DIRS =
{ # Distances haai_distances: "09.distances/01.haai", aai_distances: "09.distances/02.aai", ani_distances: "09.distances/03.ani", #ssu_distances: "09.distances/04.ssu", # Clade identification clade_finding: "10.clades/01.find", # Clade analysis subclades: "10.clades/02.ani", ogs: "10.clades/03.ogs", ess_phylogeny: "10.clades/04.phylogeny/01.essential", core_phylogeny: "10.clades/04.phylogeny/02.core", clade_metadata: "10.clades/05.metadata" }
- @@KNOWN_TYPES =
{ mixed: {description: "Mixed collection of genomes, metagenomes, " + "and viromes.", single: true, multi: true}, genomes: {description: "Collection of genomes.", single: true, multi: false}, clade: {description: "Collection of closely-related genomes " + "(ANI <= 90%).", single: true, multi: false}, metagenomes: {description: "Collection of metagenomes and/or " + "viromes.", single: false, multi: true} }
- @@DISTANCE_TASKS =
[:haai_distances, :aai_distances, :ani_distances, :clade_finding]
- @@INCLADE_TASKS =
[:subclades, :ogs, :ess_phylogeny, :core_phylogeny, :clade_metadata]
Instance Attribute Summary collapse
-
#metadata ⇒ Object
readonly
Instance.
-
#path ⇒ Object
readonly
Instance.
Class Method Summary collapse
- .DISTANCE_TASKS ⇒ Object
- .exist?(path) ⇒ Boolean
- .INCLADE_TASKS ⇒ Object
- .KNOWN_TYPES ⇒ Object
- .load(path) ⇒ Object
- .RESULT_DIRS ⇒ Object
Instance Method Summary collapse
- #add_dataset(name) ⇒ Object
- #add_result(result_type) ⇒ Object
- #create ⇒ Object
- #dataset(name) ⇒ Object
- #datasets ⇒ Object
- #done_preprocessing? ⇒ Boolean
- #each_dataset(&blk) ⇒ Object
- #each_dataset_profile_advance(&blk) ⇒ Object
- #import_dataset(ds, method = :hardlink) ⇒ Object
-
#initialize(path, update = false) ⇒ Project
constructor
A new instance of Project.
- #load ⇒ Object
- #name ⇒ Object
- #next_distances ⇒ Object
- #next_inclade ⇒ Object
-
#profile_datasets_advance ⇒ Object
Generates a two-dimensional matrix (array of arrays) where the first index corresponds to the dataset, the second index corresponds to the dataset task, and the value corresponds to: 0: Before execution.
- #result(name) ⇒ Object
- #results ⇒ Object
- #save ⇒ Object
- #unlink_dataset(name) ⇒ Object
- #unregistered_datasets ⇒ Object
Constructor Details
#initialize(path, update = false) ⇒ Project
Returns a new instance of Project.
74 75 76 77 78 79 80 81 |
# File 'lib/miga/project.rb', line 74 def initialize(path, update=false) raise "Impossible to create project in uninitialized MiGA." unless File.exist? "#{ENV["HOME"]}/.miga_rc" and File.exist? "#{ENV["HOME"]}/.miga_daemon.json" @path = File.absolute_path(path) self.create if update or not Project.exist? self.path self.load if self..nil? end |
Instance Attribute Details
#metadata ⇒ Object (readonly)
Instance
73 74 75 |
# File 'lib/miga/project.rb', line 73 def @metadata end |
#path ⇒ Object (readonly)
Instance
73 74 75 |
# File 'lib/miga/project.rb', line 73 def path @path end |
Class Method Details
.DISTANCE_TASKS ⇒ Object
61 |
# File 'lib/miga/project.rb', line 61 def self.DISTANCE_TASKS ; @@DISTANCE_TASKS ; end |
.exist?(path) ⇒ Boolean
65 66 67 |
# File 'lib/miga/project.rb', line 65 def self.exist?(path) Dir.exist?(path) and File.exist?(path + "/miga.project.json") end |
.INCLADE_TASKS ⇒ Object
62 |
# File 'lib/miga/project.rb', line 62 def self.INCLADE_TASKS ; @@INCLADE_TASKS ; end |
.KNOWN_TYPES ⇒ Object
64 |
# File 'lib/miga/project.rb', line 64 def self.KNOWN_TYPES ; @@KNOWN_TYPES ; end |
.load(path) ⇒ Object
68 69 70 71 |
# File 'lib/miga/project.rb', line 68 def self.load(path) return nil unless Project.exist? path Project.new path end |
.RESULT_DIRS ⇒ Object
63 |
# File 'lib/miga/project.rb', line 63 def self.RESULT_DIRS ; @@RESULT_DIRS ; end |
Instance Method Details
#add_dataset(name) ⇒ Object
121 122 123 124 125 126 |
# File 'lib/miga/project.rb', line 121 def add_dataset(name) self.[:datasets] << name unless self.[:datasets].include? name self.save self.dataset(name) end |
#add_result(result_type) ⇒ Object
169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 |
# File 'lib/miga/project.rb', line 169 def add_result result_type return nil if @@RESULT_DIRS[result_type].nil? base = self.path + "/data/" + @@RESULT_DIRS[result_type] + "/miga-project" return nil unless File.exist? base + ".done" r = nil case result_type when :haai_distances, :aai_distances, :ani_distances, :ssu_distances return nil unless File.exist? base + ".Rdata" and File.exist? base + ".log" and (File.exist?(base + ".txt") or File.exist?(base + ".txt.gz")) r = Result.new base + ".json" r.add_file :rdata, "miga-project.Rdata" r.add_file :matrix, "miga-project.txt" r.add_file :log, "miga-project.log" r.add_file :hist, "miga-project.hist" r.data[:gz] = File.exist?(base + ".txt.gz") when :clade_finding return nil unless File.exist? base + ".proposed-clades" r = Result.new base + ".json" r.add_file :proposal, "miga-project.proposed-clades" r.add_file :rbm_aai90, "genome-genome.aai90.rbm" r.add_file :clades_aai90, "miga-project.ani-clades" r.add_file :rbm_ani95, "genome-genome.ani95.rbm" r.add_file :clades_ani95, "miga-project.ani95-clades" when :subclades return nil unless File.exist?(base+".pdf") and File.exist?(base+".1.classif") and File.exist?(base+".1.medoids") and File.exist?(base+".class.tsv") and File.exist?(base+".class.nwk") r = Result.new base + ".json" r.add_file :report, "miga-project.pdf" (1..6).each do |i| %w{classif medoids}.each do |m| r.add_file "#{m}_#{i}".to_sym, "miga-project.#{i}.#{m}" end end r.add_file :class_table, "miga-project.class.tsv" r.add_file :class_tree, "miga-project.class.nwk" r.add_file :ani_tree, "miga-project.ani.nwk" when :ogs return nil unless File.exist?(base+".ogs") and File.exist?(base+".stats") r = Result.new base + ".json" r.add_file :ogs, "miga-project.ogs" r.add_file :stats, "miga-project.stats" r.add_file :rbm, "miga-project.rbm" end r.save r end |
#create ⇒ Object
82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
# File 'lib/miga/project.rb', line 82 def create Dir.mkdir self.path unless Dir.exist? self.path @@FOLDERS.each do |dir| Dir.mkdir self.path + "/" + dir unless Dir.exist? self.path + "/" + dir end @@DATA_FOLDERS.each do |dir| Dir.mkdir self.path + "/data/" + dir unless Dir.exist? self.path + "/data/" + dir end @metadata = Metadata.new(self.path + "/miga.project.json", {datasets: [], name: File.basename(self.path)}) FileUtils.cp(ENV["HOME"] + "/.miga_daemon.json", self.path + "/daemon/daemon.json") unless File.exist? self.path + "/daemon/daemon.json" self.load end |
#dataset(name) ⇒ Object
112 113 114 115 116 117 |
# File 'lib/miga/project.rb', line 112 def dataset(name) name = name.miga_name @datasets = {} if @datasets.nil? @datasets[name] = Dataset.new(self, name) if @datasets[name].nil? @datasets[name] end |
#datasets ⇒ Object
109 110 111 |
# File 'lib/miga/project.rb', line 109 def datasets self.[:datasets].map{ |name| self.dataset name } end |
#done_preprocessing? ⇒ Boolean
245 246 247 |
# File 'lib/miga/project.rb', line 245 def done_preprocessing? self.datasets.map{|ds| (not ds.is_ref?) or ds.done_preprocessing?}.all? end |
#each_dataset(&blk) ⇒ Object
118 119 120 |
# File 'lib/miga/project.rb', line 118 def each_dataset(&blk) self.[:datasets].each{ |name| blk.call(self.dataset name) } end |
#each_dataset_profile_advance(&blk) ⇒ Object
261 262 263 264 265 |
# File 'lib/miga/project.rb', line 261 def each_dataset_profile_advance(&blk) self.each_dataset do |ds| blk.call(ds.profile_advance) end end |
#import_dataset(ds, method = :hardlink) ⇒ Object
134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 |
# File 'lib/miga/project.rb', line 134 def import_dataset(ds, method=:hardlink) raise "Impossible to import dataset, it already exists: #{ds.name}." if Dataset.exist?(self, ds.name) # Import dataset results ds.each_result do |task, result| # import result files result.each_file do |file| File.generic_transfer("#{result.dir}/#{file}", "#{self.path}/data/#{Dataset.RESULT_DIRS[task]}/#{file}", method) end # import result metadata %w(json start done).each do |suffix| if File.exist? "#{result.dir}/#{ds.name}.#{suffix}" File.generic_transfer("#{result.dir}/#{ds.name}.#{suffix}", "#{self.path}/data/#{Dataset.RESULT_DIRS[task]}/" + "#{ds.name}.#{suffix}", method) end end end # Import dataset metadata File.generic_transfer("#{ds.project.path}/metadata/#{ds.name}.json", "#{self.path}/metadata/#{ds.name}.json", method) # Save dataset self.add_dataset ds.name end |
#load ⇒ Object
103 104 105 106 107 |
# File 'lib/miga/project.rb', line 103 def load @metadata = Metadata.load self.path + "/miga.project.json" raise "Couldn't find project metadata at #{self.path}" if self..nil? end |
#name ⇒ Object
108 |
# File 'lib/miga/project.rb', line 108 def name ; self.[:name] ; end |
#next_distances ⇒ Object
224 225 226 |
# File 'lib/miga/project.rb', line 224 def next_distances @@DISTANCE_TASKS.find{ |t| self.add_result(t).nil? } end |
#next_inclade ⇒ Object
227 228 229 230 |
# File 'lib/miga/project.rb', line 227 def next_inclade return nil unless self.[:type]==:clade @@INCLADE_TASKS.find{ |t| self.add_result(t).nil? } end |
#profile_datasets_advance ⇒ Object
Generates a two-dimensional matrix (array of arrays) where the first index corresponds to the dataset, the second index corresponds to the dataset task, and the value corresponds to:
0: Before execution.
1: Done (or not required).
2: To do.
254 255 256 257 258 259 260 |
# File 'lib/miga/project.rb', line 254 def profile_datasets_advance advance = [] self.each_dataset_profile_advance do |ds_adv| advance << ds_adv end advance end |
#result(name) ⇒ Object
161 162 163 164 165 |
# File 'lib/miga/project.rb', line 161 def result(name) return nil if @@RESULT_DIRS[name.to_sym].nil? Result.load self.path + "/data/" + @@RESULT_DIRS[name.to_sym] + "/miga-project.json" end |
#results ⇒ Object
166 167 168 |
# File 'lib/miga/project.rb', line 166 def results @@RESULT_DIRS.keys.map{ |k| self.result k }.reject{ |r| r.nil? } end |
#save ⇒ Object
99 100 101 102 |
# File 'lib/miga/project.rb', line 99 def save self..save self.load end |
#unlink_dataset(name) ⇒ Object
127 128 129 130 131 132 133 |
# File 'lib/miga/project.rb', line 127 def unlink_dataset(name) d = self.dataset name return nil if d.nil? self.[:datasets].delete(name) self.save d end |
#unregistered_datasets ⇒ Object
231 232 233 234 235 236 237 238 239 240 241 242 243 244 |
# File 'lib/miga/project.rb', line 231 def unregistered_datasets datasets = [] Dataset.RESULT_DIRS.each do |res, dir| Dir.entries(self.path + "/data/" + dir).each do |file| next unless file =~ %r{ \.(fa(a|sta|stqc?)?|fna|solexaqa|gff[23]?|done|ess)(\.gz)?$ }x m = /([^\.]+)/.match(file) datasets << m[1] unless m.nil? or m[1] == "miga-project" end end datasets.uniq - self.[:datasets] end |