Class: MiGA::Project

Inherits:
Object
  • Object
show all
Defined in:
lib/miga/project.rb

Constant Summary collapse

@@FOLDERS =

Class

%w(data metadata daemon)
@@DATA_FOLDERS =
%w(
01.raw_reads 02.trimmed_reads 03.read_quality 04.trimmed_fasta
05.assembly 06.cds
07.annotation 07.annotation/01.function 07.annotation/02.taxonomy
07.annotation/01.function/01.essential
07.annotation/01.function/02.ssu
07.annotation/02.taxonomy/01.mytaxa
07.annotation/03.qa 07.annotation/03.qa/01.checkm
07.annotation/03.qa/02.mytaxa_scan
08.mapping 08.mapping/01.read-ctg 08.mapping/02.read-gene
09.distances 09.distances/01.haai 09.distances/02.aai
09.distances/03.ani 09.distances/04.ssu
10.clades 10.clades/01.find 10.clades/02.ani 10.clades/03.ogs
10.clades/04.phylogeny 10.clades/04.phylogeny/01.essential
10.clades/04.phylogeny/02.core 10.clades/05.metadata)
@@RESULT_DIRS =
{
	 # Distances
	 haai_distances: "09.distances/01.haai",
	 aai_distances: "09.distances/02.aai",
	 ani_distances: "09.distances/03.ani",
	 #ssu_distances: "09.distances/04.ssu",
	 # Clade identification
	 clade_finding: "10.clades/01.find",
	 # Clade analysis
	 subclades: "10.clades/02.ani",
	 ogs: "10.clades/03.ogs",
	 ess_phylogeny: "10.clades/04.phylogeny/01.essential",
	 core_phylogeny: "10.clades/04.phylogeny/02.core",
	 clade_metadata: "10.clades/05.metadata"
}
@@KNOWN_TYPES =
{
	 mixed: {description: "Mixed collection of genomes, metagenomes, " +
	    "and viromes.",
	    single: true, multi: true},
	 genomes: {description: "Collection of genomes.",
	    single: true, multi: false},
	 clade: {description: "Collection of closely-related genomes " +
	    "(ANI <= 90%).",
	    single: true, multi: false},
	 metagenomes: {description: "Collection of metagenomes and/or " +
	    "viromes.",
	    single: false, multi: true}
}
@@DISTANCE_TASKS =
[:haai_distances, :aai_distances, :ani_distances,
:clade_finding]
@@INCLADE_TASKS =
[:subclades, :ogs, :ess_phylogeny, :core_phylogeny,
:clade_metadata]

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(path, update = false) ⇒ Project

Returns a new instance of Project.



74
75
76
77
78
79
80
81
# File 'lib/miga/project.rb', line 74

def initialize(path, update=false)
   raise "Impossible to create project in uninitialized MiGA." unless
	    File.exist? "#{ENV["HOME"]}/.miga_rc" and
	    File.exist? "#{ENV["HOME"]}/.miga_daemon.json"
	 @path = File.absolute_path(path)
	 self.create if update or not Project.exist? self.path
	 self.load if self..nil?
end

Instance Attribute Details

#metadataObject (readonly)

Instance



73
74
75
# File 'lib/miga/project.rb', line 73

def 
  @metadata
end

#pathObject (readonly)

Instance



73
74
75
# File 'lib/miga/project.rb', line 73

def path
  @path
end

Class Method Details

.DISTANCE_TASKSObject



61
# File 'lib/miga/project.rb', line 61

def self.DISTANCE_TASKS ; @@DISTANCE_TASKS ; end

.exist?(path) ⇒ Boolean

Returns:

  • (Boolean)


65
66
67
# File 'lib/miga/project.rb', line 65

def self.exist?(path)
	 Dir.exist?(path) and File.exist?(path + "/miga.project.json")
end

.INCLADE_TASKSObject



62
# File 'lib/miga/project.rb', line 62

def self.INCLADE_TASKS ; @@INCLADE_TASKS ; end

.KNOWN_TYPESObject



64
# File 'lib/miga/project.rb', line 64

def self.KNOWN_TYPES ; @@KNOWN_TYPES ; end

.load(path) ⇒ Object



68
69
70
71
# File 'lib/miga/project.rb', line 68

def self.load(path)
	 return nil unless Project.exist? path
	 Project.new path
end

.RESULT_DIRSObject



63
# File 'lib/miga/project.rb', line 63

def self.RESULT_DIRS ; @@RESULT_DIRS ; end

Instance Method Details

#add_dataset(name) ⇒ Object



121
122
123
124
125
126
# File 'lib/miga/project.rb', line 121

def add_dataset(name)
	 self.[:datasets] << name unless
	    self.[:datasets].include? name
	 self.save
	 self.dataset(name)
end

#add_result(result_type) ⇒ Object



169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
# File 'lib/miga/project.rb', line 169

def add_result result_type
	 return nil if @@RESULT_DIRS[result_type].nil?
	 base = self.path + "/data/" + @@RESULT_DIRS[result_type] +
	    "/miga-project"
	 return nil unless File.exist? base + ".done"
	 r = nil
	 case result_type
	 when :haai_distances, :aai_distances, :ani_distances, :ssu_distances
	    return nil unless
  File.exist? base + ".Rdata" and
  File.exist? base + ".log" and
  (File.exist?(base + ".txt") or File.exist?(base + ".txt.gz"))
	    r = Result.new base + ".json"
	    r.add_file :rdata, "miga-project.Rdata"
	    r.add_file :matrix, "miga-project.txt"
	    r.add_file :log, "miga-project.log"
	    r.add_file :hist, "miga-project.hist"
	    r.data[:gz] = File.exist?(base + ".txt.gz")
	 when :clade_finding
	    return nil unless File.exist? base + ".proposed-clades"
	    r = Result.new base + ".json"
	    r.add_file :proposal, "miga-project.proposed-clades"
	    r.add_file :rbm_aai90, "genome-genome.aai90.rbm"
	    r.add_file :clades_aai90, "miga-project.ani-clades"
	    r.add_file :rbm_ani95, "genome-genome.ani95.rbm"
	    r.add_file :clades_ani95, "miga-project.ani95-clades"
	 when :subclades
	    return nil unless
  File.exist?(base+".pdf") and
  File.exist?(base+".1.classif") and
  File.exist?(base+".1.medoids") and
  File.exist?(base+".class.tsv") and
  File.exist?(base+".class.nwk")
	    r = Result.new base + ".json"
	    r.add_file :report, "miga-project.pdf"
	    (1..6).each do |i|
  %w{classif medoids}.each do |m|
		  r.add_file "#{m}_#{i}".to_sym, "miga-project.#{i}.#{m}"
  end
	    end
	    r.add_file :class_table, "miga-project.class.tsv"
	    r.add_file :class_tree,  "miga-project.class.nwk"
	    r.add_file :ani_tree,    "miga-project.ani.nwk"
	 when :ogs
	    return nil unless
  File.exist?(base+".ogs") and
  File.exist?(base+".stats")
	    r = Result.new base + ".json"
	    r.add_file :ogs, "miga-project.ogs"
	    r.add_file :stats, "miga-project.stats"
	    r.add_file :rbm, "miga-project.rbm"
	 end
	 r.save
	 r
end

#createObject



82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# File 'lib/miga/project.rb', line 82

def create
	 Dir.mkdir self.path unless Dir.exist? self.path
	 @@FOLDERS.each do |dir|
	    Dir.mkdir self.path + "/" + dir unless
  Dir.exist? self.path + "/" + dir
	 end
	 @@DATA_FOLDERS.each do |dir|
	    Dir.mkdir self.path + "/data/" + dir unless
  Dir.exist? self.path + "/data/" + dir
	 end
	 @metadata = Metadata.new(self.path + "/miga.project.json",
	    {datasets: [], name: File.basename(self.path)})
	 FileUtils.cp(ENV["HOME"] + "/.miga_daemon.json",
	    self.path + "/daemon/daemon.json") unless
	    File.exist? self.path + "/daemon/daemon.json"
	 self.load
end

#dataset(name) ⇒ Object



112
113
114
115
116
117
# File 'lib/miga/project.rb', line 112

def dataset(name)
	 name = name.miga_name
	 @datasets = {} if @datasets.nil?
	 @datasets[name] = Dataset.new(self, name) if @datasets[name].nil? 
	 @datasets[name]
end

#datasetsObject



109
110
111
# File 'lib/miga/project.rb', line 109

def datasets
	 self.[:datasets].map{ |name| self.dataset name }
end

#done_preprocessing?Boolean

Returns:

  • (Boolean)


245
246
247
# File 'lib/miga/project.rb', line 245

def done_preprocessing?
	 self.datasets.map{|ds| (not ds.is_ref?) or ds.done_preprocessing?}.all?
end

#each_dataset(&blk) ⇒ Object



118
119
120
# File 'lib/miga/project.rb', line 118

def each_dataset(&blk)
	 self.[:datasets].each{ |name| blk.call(self.dataset name) }
end

#each_dataset_profile_advance(&blk) ⇒ Object



261
262
263
264
265
# File 'lib/miga/project.rb', line 261

def each_dataset_profile_advance(&blk)
   self.each_dataset do |ds|
	    blk.call(ds.profile_advance)
	 end
end

#import_dataset(ds, method = :hardlink) ⇒ Object



134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
# File 'lib/miga/project.rb', line 134

def import_dataset(ds, method=:hardlink)
	 raise "Impossible to import dataset, it already exists: #{ds.name}." if
	    Dataset.exist?(self, ds.name)
	 # Import dataset results
	 ds.each_result do |task, result|
	    # import result files
	    result.each_file do |file|
  File.generic_transfer("#{result.dir}/#{file}",
		  "#{self.path}/data/#{Dataset.RESULT_DIRS[task]}/#{file}",
		  method)
	    end
	    # import result metadata
	    %w(json start done).each do |suffix|
  if File.exist? "#{result.dir}/#{ds.name}.#{suffix}"
		  File.generic_transfer("#{result.dir}/#{ds.name}.#{suffix}",
 "#{self.path}/data/#{Dataset.RESULT_DIRS[task]}/" +
			"#{ds.name}.#{suffix}",
 method)
  end
	    end
	 end
	 # Import dataset metadata
	 File.generic_transfer("#{ds.project.path}/metadata/#{ds.name}.json",
	    "#{self.path}/metadata/#{ds.name}.json", method)
	 # Save dataset
	 self.add_dataset ds.name 
end

#loadObject



103
104
105
106
107
# File 'lib/miga/project.rb', line 103

def load
	 @metadata = Metadata.load self.path + "/miga.project.json"
	 raise "Couldn't find project metadata at #{self.path}" if
	    self..nil?
end

#nameObject



108
# File 'lib/miga/project.rb', line 108

def name ; self.[:name] ; end

#next_distancesObject



224
225
226
# File 'lib/miga/project.rb', line 224

def next_distances
	 @@DISTANCE_TASKS.find{ |t| self.add_result(t).nil? }
end

#next_incladeObject



227
228
229
230
# File 'lib/miga/project.rb', line 227

def next_inclade
   return nil unless self.[:type]==:clade
	 @@INCLADE_TASKS.find{ |t| self.add_result(t).nil? }
end

#profile_datasets_advanceObject

Generates a two-dimensional matrix (array of arrays) where the first index corresponds to the dataset, the second index corresponds to the dataset task, and the value corresponds to:

0: Before execution.
1: Done (or not required).
2: To do.


254
255
256
257
258
259
260
# File 'lib/miga/project.rb', line 254

def profile_datasets_advance
	 advance = []
	 self.each_dataset_profile_advance do |ds_adv|
	    advance << ds_adv
	 end
	 advance
end

#result(name) ⇒ Object



161
162
163
164
165
# File 'lib/miga/project.rb', line 161

def result(name)
	 return nil if @@RESULT_DIRS[name.to_sym].nil?
	 Result.load self.path + "/data/" + @@RESULT_DIRS[name.to_sym] + 
	    "/miga-project.json"
end

#resultsObject



166
167
168
# File 'lib/miga/project.rb', line 166

def results
	 @@RESULT_DIRS.keys.map{ |k| self.result k }.reject{ |r| r.nil? }
end

#saveObject



99
100
101
102
# File 'lib/miga/project.rb', line 99

def save
	 self..save
	 self.load
end


127
128
129
130
131
132
133
# File 'lib/miga/project.rb', line 127

def unlink_dataset(name)
	 d = self.dataset name
	 return nil if d.nil?
	 self.[:datasets].delete(name)
	 self.save
	 d
end

#unregistered_datasetsObject



231
232
233
234
235
236
237
238
239
240
241
242
243
244
# File 'lib/miga/project.rb', line 231

def unregistered_datasets
	 datasets = []
	 Dataset.RESULT_DIRS.each do |res, dir|
	    Dir.entries(self.path + "/data/" + dir).each do |file|
  next unless
		  file =~ %r{
 \.(fa(a|sta|stqc?)?|fna|solexaqa|gff[23]?|done|ess)(\.gz)?$
 }x
  m = /([^\.]+)/.match(file)
  datasets << m[1] unless m.nil? or m[1] == "miga-project"
	    end
	 end
	 datasets.uniq - self.[:datasets]
end