Class: MiGA::Project

Inherits:
MiGA
  • Object
show all
Defined in:
lib/miga/project.rb

Overview

MiGA representation of a project.

Constant Summary collapse

@@FOLDERS =

Top-level folders inside a project.

%w[data metadata daemon]
@@DATA_FOLDERS =

Folders for results.

%w[
  01.raw_reads 02.trimmed_reads 03.read_quality 04.trimmed_fasta
  05.assembly 06.cds
  07.annotation 07.annotation/01.function 07.annotation/02.taxonomy
  07.annotation/01.function/01.essential
  07.annotation/01.function/02.ssu
  07.annotation/02.taxonomy/01.mytaxa
  07.annotation/03.qa 07.annotation/03.qa/01.checkm
  07.annotation/03.qa/02.mytaxa_scan
  08.mapping 08.mapping/01.read-ctg 08.mapping/02.read-gene
  09.distances 09.distances/01.haai 09.distances/02.aai
  09.distances/03.ani 09.distances/04.ssu
  10.clades 10.clades/01.find 10.clades/02.ani 10.clades/03.ogs
  10.clades/04.phylogeny 10.clades/04.phylogeny/01.essential
  10.clades/04.phylogeny/02.core 10.clades/05.metadata
]
@@RESULT_DIRS =
{
  # Distances
  haai_distances: "09.distances/01.haai",
  aai_distances: "09.distances/02.aai",
  ani_distances: "09.distances/03.ani",
  #ssu_distances: "09.distances/04.ssu",
  # Clade identification
  clade_finding: "10.clades/01.find",
  # Clade analysis
  subclades: "10.clades/02.ani",
  ogs: "10.clades/03.ogs",
  ess_phylogeny: "10.clades/04.phylogeny/01.essential",
  core_phylogeny: "10.clades/04.phylogeny/02.core",
  clade_metadata: "10.clades/05.metadata"
}
@@KNOWN_TYPES =
{
  mixed: {
    description: "Mixed collection of genomes, metagenomes, and viromes.",
    single: true, multi: true},
  genomes: {description: "Collection of genomes.",
    single: true, multi: false},
  clade: {description: "Collection of closely-related genomes (ANI <= 90%).",
    single: true, multi: false},
  metagenomes: {description: "Collection of metagenomes and/or viromes.",
    single: false, multi: true}
}
@@DISTANCE_TASKS =
[:haai_distances, :aai_distances, :ani_distances,
:clade_finding]
@@INCLADE_TASKS =
[:subclades, :ogs, :ess_phylogeny, :core_phylogeny,
:clade_metadata]

Constants included from MiGA

CITATION, VERSION, VERSION_DATE, VERSION_NAME

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from MiGA

CITATION, DEBUG, DEBUG_OFF, DEBUG_ON, DEBUG_TRACE_OFF, DEBUG_TRACE_ON, FULL_VERSION, LONG_VERSION, VERSION, VERSION_DATE, initialized?, #result_files_exist?, root_path, tabulate

Constructor Details

#initialize(path, update = false) ⇒ Project

Create a new MiGA::Project at path, if it doesn’t exist and update is false, or load an existing one.



108
109
110
111
112
113
# File 'lib/miga/project.rb', line 108

def initialize(path, update=false)
  @datasets = {}
  @path = File.absolute_path(path)
  self.create if update or not Project.exist? self.path
  self.load if self..nil?
end

Instance Attribute Details

#metadataObject (readonly)

Information about the project as MiGA::Metadata.



103
104
105
# File 'lib/miga/project.rb', line 103

def 
  @metadata
end

#pathObject (readonly)

Absolute path to the project folder.



99
100
101
# File 'lib/miga/project.rb', line 99

def path
  @path
end

Class Method Details

.DISTANCE_TASKSObject

Project-wide distance estimations.



71
# File 'lib/miga/project.rb', line 71

def self.DISTANCE_TASKS ; @@DISTANCE_TASKS ; end

.exist?(path) ⇒ Boolean

Does the project at path exist?

Returns:

  • (Boolean)


83
84
85
# File 'lib/miga/project.rb', line 83

def self.exist?(path)
  Dir.exist?(path) and File.exist?(path + "/miga.project.json")
end

.INCLADE_TASKSObject

Project-wide tasks for :clade projects.



77
# File 'lib/miga/project.rb', line 77

def self.INCLADE_TASKS ; @@INCLADE_TASKS ; end

.KNOWN_TYPESObject

Supported types of projects.



56
# File 'lib/miga/project.rb', line 56

def self.KNOWN_TYPES ; @@KNOWN_TYPES ; end

.load(path) ⇒ Object

Load the project at path. Returns MiGA::Project if project exists, nil otherwise.



90
91
92
93
# File 'lib/miga/project.rb', line 90

def self.load(path)
  return nil unless Project.exist? path
  Project.new path
end

.RESULT_DIRSObject

Directories containing the results from project-wide tasks.



37
# File 'lib/miga/project.rb', line 37

def self.RESULT_DIRS ; @@RESULT_DIRS ; end

Instance Method Details

#add_dataset(name) ⇒ Object

Add dataset identified by name and return MiGA::Dataset.



175
176
177
178
179
180
181
182
# File 'lib/miga/project.rb', line 175

def add_dataset(name)
  unless [:datasets].include? name
    d = MiGA::Dataset.new(self, name)
    @metadata[:datasets] << name
    save
  end
  dataset(name)
end

#add_result(name, save = true) ⇒ Object

Add the result identified by Symbol name, and return MiGA::Result. Save the result if save.



240
241
242
243
244
245
246
247
248
# File 'lib/miga/project.rb', line 240

def add_result(name, save=true)
  return nil if @@RESULT_DIRS[name].nil?
  base = "#{path}/data/#{@@RESULT_DIRS[name]}/miga-project"
  return MiGA::Result.load(base + ".json") unless save
  return nil unless result_files_exist?(base, ".done")
  r = send("add_result_#{name}", base)
  r.save
  r
end

#createObject

Create an empty project.



117
118
119
120
121
122
123
124
125
126
127
128
129
130
# File 'lib/miga/project.rb', line 117

def create
  unless MiGA::MiGA.initialized?
    raise "Impossible to create project in uninitialized MiGA."
  end
  dirs = [path] + @@FOLDERS.map{|d| "#{path}/#{d}" } +
    @@DATA_FOLDERS.map{ |d| "#{path}/data/#{d}"}
  dirs.each{ |d| Dir.mkdir(d) unless Dir.exist? d }
  @metadata = MiGA::Metadata.new(self.path + "/miga.project.json",
    {datasets: [], name: File.basename(path)})
  FileUtils.cp(ENV["MIGA_HOME"] + "/.miga_daemon.json",
    "#{path}/daemon/daemon.json") unless
      File.exist? "#{path}/daemon/daemon.json"
  self.load
end

#dataset(name) ⇒ Object

Returns MiGA::Dataset.



158
159
160
161
162
163
164
# File 'lib/miga/project.rb', line 158

def dataset(name)
  name = name.miga_name
  return nil unless MiGA::Dataset.exist?(self, name)
  @datasets ||= {}
  @datasets[name] ||= MiGA::Dataset.new(self, name)
  @datasets[name]
end

#datasetsObject

Returns Array of MiGA::Dataset.



152
153
154
# File 'lib/miga/project.rb', line 152

def datasets
  [:datasets].map{ |name| dataset(name) }
end

#done_preprocessing?(save = true) ⇒ Boolean

Are all the datasets in the project preprocessed? Save intermediate results if save.

Returns:

  • (Boolean)


285
286
287
# File 'lib/miga/project.rb', line 285

def done_preprocessing?(save=true)
  datasets.map{|ds| (not ds.is_ref?) or ds.done_preprocessing?(save) }.all?
end

#each_dataset(&blk) ⇒ Object

Iterate through datasets, with a single variable MiGA::Dataset passed to blk.



169
170
171
# File 'lib/miga/project.rb', line 169

def each_dataset(&blk)
  [:datasets].each{ |name| blk.call(dataset(name)) }
end

#each_dataset_profile_advance(&blk) ⇒ Object

Call blk passing the result of MiGA::Dataset#profile_advance for each registered dataset.



307
308
309
# File 'lib/miga/project.rb', line 307

def each_dataset_profile_advance(&blk)
  each_dataset { |ds| blk.call(ds.profile_advance) }
end

#import_dataset(ds, method = :hardlink) ⇒ Object

Import the dataset ds, a MiGA::Dataset, using method which is any method supported by File#generic_transfer.



197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
# File 'lib/miga/project.rb', line 197

def import_dataset(ds, method=:hardlink)
  raise "Impossible to import dataset, it already exists: #{ds.name}." if
    MiGA::Dataset.exist?(self, ds.name)
  # Import dataset results
  ds.each_result do |task, result|
    # import result files
    result.each_file do |file|
      File.generic_transfer("#{result.dir}/#{file}",
        "#{path}/data/#{MiGA::Dataset.RESULT_DIRS[task]}/#{file}", method)
    end
    # import result metadata
    %w(json start done).each do |suffix|
      if File.exist? "#{result.dir}/#{ds.name}.#{suffix}"
        File.generic_transfer("#{result.dir}/#{ds.name}.#{suffix}",
          "#{path}/data/#{MiGA::Dataset.RESULT_DIRS[task]}/" +
                     "#{ds.name}.#{suffix}", method)
      end
    end
  end
  # Import dataset metadata
  File.generic_transfer("#{ds.project.path}/metadata/#{ds.name}.json",
    "#{self.path}/metadata/#{ds.name}.json", method)
  # Save dataset
  self.add_dataset(ds.name)
end

#loadObject

(Re-)load project data and metadata.



141
142
143
144
# File 'lib/miga/project.rb', line 141

def load
  @metadata = MiGA::Metadata.load "#{path}/miga.project.json"
  raise "Couldn't find project metadata at #{path}" if .nil?
end

#nameObject

Name of the project.



148
# File 'lib/miga/project.rb', line 148

def name ; [:name] ; end

#next_distances(save = true) ⇒ Object

Get the next distances task, saving intermediate results if save. Returns a Symbol.



253
254
255
# File 'lib/miga/project.rb', line 253

def next_distances(save=true)
  @@DISTANCE_TASKS.find{ |t| add_result(t, save).nil? }
end

#next_inclade(save = true) ⇒ Object

Get the next inclade task, saving intermediate results if save. Returns a Symbol.



260
261
262
263
# File 'lib/miga/project.rb', line 260

def next_inclade(save=true)
  return nil unless [:type]==:clade
  @@INCLADE_TASKS.find{ |t| add_result(t, save).nil? }
end

#profile_datasets_advanceObject

Returns a two-dimensional matrix (Array of Array) where the first index corresponds to the dataset, the second index corresponds to the dataset task, and the value corresponds to:

  • 0: Before execution.

  • 1: Done (or not required).

  • 2: To do.



296
297
298
299
300
301
302
# File 'lib/miga/project.rb', line 296

def profile_datasets_advance
  advance = []
  self.each_dataset_profile_advance do |ds_adv|
    advance << ds_adv
  end
  advance
end

#result(name) ⇒ Object

Get result identified by Symbol name, returns MiGA::Result.



225
226
227
228
229
# File 'lib/miga/project.rb', line 225

def result(name)
  return nil if @@RESULT_DIRS[name.to_sym].nil?
  MiGA::Result.load "#{path}/data/" + @@RESULT_DIRS[name.to_sym] + 
    "/miga-project.json"
end

#resultsObject

Get all results, an Array of MiGA::Result.



233
234
235
# File 'lib/miga/project.rb', line 233

def results
  @@RESULT_DIRS.keys.map{ |k| result(k) }.reject{ |r| r.nil? }
end

#saveObject

Save any changes persistently.



134
135
136
137
# File 'lib/miga/project.rb', line 134

def save
  .save
  self.load
end

Unlink dataset identified by name and return MiGA::Dataset.



186
187
188
189
190
191
192
# File 'lib/miga/project.rb', line 186

def unlink_dataset(name)
  d = dataset(name)
  return nil if d.nil?
  self.[:datasets].delete(name)
  save
  d
end

#unregistered_datasetsObject

Find all datasets with (potential) result files but are yet unregistered.



267
268
269
270
271
272
273
274
275
276
277
278
279
280
# File 'lib/miga/project.rb', line 267

def unregistered_datasets
  datasets = []
  MiGA::Dataset.RESULT_DIRS.values.each do |dir|
    Dir.entries("#{path}/data/#{dir}").each do |file|
      next unless
        file =~ %r{
          \.(fa(a|sta|stqc?)?|fna|solexaqa|gff[23]?|done|ess)(\.gz)?$
        }x
      m = /([^\.]+)/.match(file)
      datasets << m[1] unless m.nil? or m[1] == "miga-project"
    end
  end
  datasets.uniq - [:datasets]
end