Class: MiGA::Project

Inherits:
MiGA
  • Object
show all
Includes:
ProjectResult
Defined in:
lib/miga/project.rb

Overview

MiGA representation of a project.

Constant Summary collapse

@@FOLDERS =

Top-level folders inside a project.

%w[data metadata daemon]
@@DATA_FOLDERS =

Folders for results.

%w[
  01.raw_reads 02.trimmed_reads 03.read_quality 04.trimmed_fasta
  05.assembly 06.cds
  07.annotation 07.annotation/01.function 07.annotation/02.taxonomy
  07.annotation/01.function/01.essential
  07.annotation/01.function/02.ssu
  07.annotation/02.taxonomy/01.mytaxa
  07.annotation/03.qa 07.annotation/03.qa/01.checkm
  07.annotation/03.qa/02.mytaxa_scan
  08.mapping 08.mapping/01.read-ctg 08.mapping/02.read-gene
  09.distances 09.distances/01.haai 09.distances/02.aai
  09.distances/03.ani 09.distances/04.ssu 09.distances/05.taxonomy
  10.clades 10.clades/01.find 10.clades/02.ani 10.clades/03.ogs
  10.clades/04.phylogeny 10.clades/04.phylogeny/01.essential
  10.clades/04.phylogeny/02.core 10.clades/05.metadata
  90.stats
]
@@RESULT_DIRS =
{
  project_stats: "90.stats",
  # Distances
  haai_distances: "09.distances/01.haai",
  aai_distances: "09.distances/02.aai",
  ani_distances: "09.distances/03.ani",
  #ssu_distances: "09.distances/04.ssu",
  # Clade identification
  clade_finding: "10.clades/01.find",
  # Clade analysis
  subclades: "10.clades/02.ani",
  ogs: "10.clades/03.ogs"
  #ess_phylogeny: "10.clades/04.phylogeny/01.essential",
  #core_phylogeny: "10.clades/04.phylogeny/02.core",
  #clade_metadata: "10.clades/05.metadata"
}
@@KNOWN_TYPES =
{
  mixed: {
    description: "Mixed collection of genomes, metagenomes, and viromes.",
    single: true, multi: true},
  genomes: {description: "Collection of genomes.",
    single: true, multi: false},
  clade: {description: "Collection of closely-related genomes (ANI >= 90%).",
    single: true, multi: false},
  metagenomes: {description: "Collection of metagenomes and/or viromes.",
    single: false, multi: true}
}
@@DISTANCE_TASKS =
[:project_stats,
:haai_distances, :aai_distances, :ani_distances, :clade_finding]
@@INCLADE_TASKS =
[:subclades, :ogs]

Constants included from MiGA

CITATION, VERSION, VERSION_DATE, VERSION_NAME

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from MiGA

CITATION, DEBUG, DEBUG_OFF, DEBUG_ON, DEBUG_TRACE_OFF, DEBUG_TRACE_ON, FULL_VERSION, LONG_VERSION, VERSION, VERSION_DATE, clean_fasta_file, initialized?, #result_files_exist?, root_path, script_path, tabulate

Constructor Details

#initialize(path, update = false) ⇒ Project

Create a new MiGA::Project at path, if it doesn’t exist and update is false, or load an existing one.



112
113
114
115
116
117
118
119
120
# File 'lib/miga/project.rb', line 112

def initialize(path, update=false)
  @datasets = {}
  @path = File.absolute_path(path)
  self.create if not update and not Project.exist? self.path
  self.load if self..nil?
  self.load_plugins
  self.[:type] = :mixed if type.nil?
  raise "Unrecognized project type: #{type}." if @@KNOWN_TYPES[type].nil?
end

Instance Attribute Details

#metadataObject (readonly)

Information about the project as MiGA::Metadata.



107
108
109
# File 'lib/miga/project.rb', line 107

def 
  @metadata
end

#pathObject (readonly)

Absolute path to the project folder.



103
104
105
# File 'lib/miga/project.rb', line 103

def path
  @path
end

Class Method Details

.DISTANCE_TASKSObject

Project-wide distance estimations.



76
# File 'lib/miga/project.rb', line 76

def self.DISTANCE_TASKS ; @@DISTANCE_TASKS ; end

.exist?(path) ⇒ Boolean

Does the project at path exist?

Returns:

  • (Boolean)


87
88
89
# File 'lib/miga/project.rb', line 87

def self.exist?(path)
  Dir.exist?(path) and File.exist?("#{path}/miga.project.json")
end

.INCLADE_TASKSObject

Project-wide tasks for :clade projects.



82
# File 'lib/miga/project.rb', line 82

def self.INCLADE_TASKS ; @@INCLADE_TASKS ; end

.KNOWN_TYPESObject

Supported types of projects.



61
# File 'lib/miga/project.rb', line 61

def self.KNOWN_TYPES ; @@KNOWN_TYPES ; end

.load(path) ⇒ Object

Load the project at path. Returns MiGA::Project if project exists, nil otherwise.



94
95
96
97
# File 'lib/miga/project.rb', line 94

def self.load(path)
  return nil unless Project.exist? path
  Project.new path
end

.RESULT_DIRSObject

Directories containing the results from project-wide tasks.



41
# File 'lib/miga/project.rb', line 41

def self.RESULT_DIRS ; @@RESULT_DIRS ; end

Instance Method Details

#add_dataset(name) ⇒ Object

Add dataset identified by name and return MiGA::Dataset.



208
209
210
211
212
213
214
215
# File 'lib/miga/project.rb', line 208

def add_dataset(name)
  unless [:datasets].include? name
    MiGA::Dataset.new(self, name)
    @metadata[:datasets] << name
    save
  end
  dataset(name)
end

#add_result(name, save = true) ⇒ Object

Add the result identified by Symbol name, and return MiGA::Result. Save the result if save.



273
274
275
276
277
278
279
280
281
282
# File 'lib/miga/project.rb', line 273

def add_result(name, save=true)
  return nil if @@RESULT_DIRS[name].nil?
  base = "#{path}/data/#{@@RESULT_DIRS[name]}/miga-project"
  r_pre = MiGA::Result.load("#{base}.json")
  return r_pre if (r_pre.nil? and not save) or not r_pre.nil?
  r = result_files_exist?(base, ".done") ?
      send("add_result_#{name}", base) : nil
  r.save unless r.nil?
  r
end

#createObject

Create an empty project.



124
125
126
127
128
129
130
131
132
133
134
135
136
137
# File 'lib/miga/project.rb', line 124

def create
  unless MiGA::MiGA.initialized?
    raise "Impossible to create project in uninitialized MiGA."
  end
  dirs = [path] + @@FOLDERS.map{|d| "#{path}/#{d}" } +
    @@DATA_FOLDERS.map{ |d| "#{path}/data/#{d}"}
  dirs.each{ |d| Dir.mkdir(d) unless Dir.exist? d }
  @metadata = MiGA::Metadata.new(self.path + "/miga.project.json",
    {datasets: [], name: File.basename(path)})
  FileUtils.cp("#{ENV["MIGA_HOME"]}/.miga_daemon.json",
    "#{path}/daemon/daemon.json") unless
      File.exist? "#{path}/daemon/daemon.json"
  self.load
end

#dataset(name) ⇒ Object

Returns MiGA::Dataset.



184
185
186
187
188
189
190
# File 'lib/miga/project.rb', line 184

def dataset(name)
  name = name.miga_name
  return nil unless MiGA::Dataset.exist?(self, name)
  @datasets ||= {}
  @datasets[name] ||= MiGA::Dataset.new(self, name)
  @datasets[name]
end

#dataset_namesObject

Returns Array of String (without evaluating dataset objects).



178
179
180
# File 'lib/miga/project.rb', line 178

def dataset_names
  [:datasets]
end

#datasetsObject

Returns Array of MiGA::Dataset.



172
173
174
# File 'lib/miga/project.rb', line 172

def datasets
  [:datasets].map{ |name| dataset(name) }
end

#done_preprocessing?(save = true) ⇒ Boolean

Are all the datasets in the project preprocessed? Save intermediate results if save.

Returns:

  • (Boolean)


331
332
333
# File 'lib/miga/project.rb', line 331

def done_preprocessing?(save=true)
  datasets.map{|ds| (not ds.is_ref?) or ds.done_preprocessing?(save) }.all?
end

#each_dataset(&blk) ⇒ Object

Iterate through datasets, with one or two variables passed to blk. If one, the dataset MiGA::Dataset object is passed. If two, the name and the dataset object are passed.



196
197
198
199
200
201
202
203
204
# File 'lib/miga/project.rb', line 196

def each_dataset(&blk)
  [:datasets].each do |name|
    if blk.arity == 1
      blk.call(dataset(name))
    else
      blk.call(name, dataset(name))
    end
  end
end

#each_dataset_profile_advance(&blk) ⇒ Object

Call blk passing the result of MiGA::Dataset#profile_advance for each registered dataset.



353
354
355
# File 'lib/miga/project.rb', line 353

def each_dataset_profile_advance(&blk)
  each_dataset { |ds| blk.call(ds.profile_advance) }
end

#import_dataset(ds, method = :hardlink) ⇒ Object

Import the dataset ds, a MiGA::Dataset, using method which is any method supported by File#generic_transfer.



230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
# File 'lib/miga/project.rb', line 230

def import_dataset(ds, method=:hardlink)
  raise "Impossible to import dataset, it already exists: #{ds.name}." if
    MiGA::Dataset.exist?(self, ds.name)
  # Import dataset results
  ds.each_result do |task, result|
    # import result files
    result.each_file do |file|
      File.generic_transfer("#{result.dir}/#{file}",
        "#{path}/data/#{MiGA::Dataset.RESULT_DIRS[task]}/#{file}", method)
    end
    # import result metadata
    %w(json start done).each do |suffix|
      if File.exist? "#{result.dir}/#{ds.name}.#{suffix}"
        File.generic_transfer("#{result.dir}/#{ds.name}.#{suffix}",
          "#{path}/data/#{MiGA::Dataset.RESULT_DIRS[task]}/" +
                     "#{ds.name}.#{suffix}", method)
      end
    end
  end
  # Import dataset metadata
  File.generic_transfer("#{ds.project.path}/metadata/#{ds.name}.json",
    "#{self.path}/metadata/#{ds.name}.json", method)
  # Save dataset
  self.add_dataset(ds.name)
end

#install_plugin(path) ⇒ Object

Installs the plugin in the specified path.



359
360
361
362
363
364
365
366
367
368
# File 'lib/miga/project.rb', line 359

def install_plugin(path)
  abs_path = File.absolute_path(path)
  raise "Plugin already installed in project: #{abs_path}." unless
    [:plugins].nil? or not [:plugins].include?(abs_path)
  raise "Malformed MiGA plugin: #{abs_path}." unless
    File.exist?(File.expand_path("miga-plugin.json", abs_path))
  self.[:plugins] ||= []
  self.[:plugins] << abs_path
  save
end

#is_clade?Boolean

Is this a clade project?

Returns:

  • (Boolean)


164
# File 'lib/miga/project.rb', line 164

def is_clade? ; type==:clade ; end

#is_multi?Boolean

Is this a project for multi-organism datasets?

Returns:

  • (Boolean)


168
# File 'lib/miga/project.rb', line 168

def is_multi? ; @@KNOWN_TYPES[type][:multi] ; end

#loadObject

(Re-)load project data and metadata.



148
149
150
151
152
# File 'lib/miga/project.rb', line 148

def load
  @datasets = {}
  @metadata = MiGA::Metadata.load "#{path}/miga.project.json"
  raise "Couldn't find project metadata at #{path}" if .nil?
end

#load_pluginsObject

Loads the plugins installed in the project.



386
387
388
# File 'lib/miga/project.rb', line 386

def load_plugins
  plugins.each { |pl| require File.expand_path("lib-plugin.rb", pl) }
end

#nameObject

Name of the project.



156
# File 'lib/miga/project.rb', line 156

def name ; [:name] ; end

#next_distances(save = true) ⇒ Object

Get the next distances task, saving intermediate results if save. Returns a Symbol.



287
# File 'lib/miga/project.rb', line 287

def next_distances(save=true) ; next_task(@@DISTANCE_TASKS, save) ; end

#next_inclade(save = true) ⇒ Object

Get the next inclade task, saving intermediate results if save. Returns a Symbol.



292
# File 'lib/miga/project.rb', line 292

def next_inclade(save=true) ; next_task(@@INCLADE_TASKS, save) ; end

#next_task(tasks = @@DISTANCE_TASKS+@@INCLADE_TASKS, save = true) ⇒ Object

Get the next task from tasks, saving intermediate results if save. Returns a Symbol.



297
298
299
300
301
302
303
304
305
306
307
# File 'lib/miga/project.rb', line 297

def next_task(tasks=@@DISTANCE_TASKS+@@INCLADE_TASKS, save=true)
  tasks.find do |t|
    if ["run_#{t}"]==false or
          (!is_clade? and @@INCLADE_TASKS.include?(t) and
                ["run_#{t}"]!=true)
      false
    else
      add_result(t, save).nil?
    end
  end
end

#pluginsObject

List plugins installed in the project.



382
# File 'lib/miga/project.rb', line 382

def plugins ; [:plugins] ||= [] ; end

#profile_datasets_advanceObject

Returns a two-dimensional matrix (Array of Array) where the first index corresponds to the dataset, the second index corresponds to the dataset task, and the value corresponds to:

  • 0: Before execution.

  • 1: Done (or not required).

  • 2: To do.



342
343
344
345
346
347
348
# File 'lib/miga/project.rb', line 342

def profile_datasets_advance
  advance = []
  self.each_dataset_profile_advance do |ds_adv|
    advance << ds_adv
  end
  advance
end

#result(name) ⇒ Object

Get result identified by Symbol name, returns MiGA::Result.



258
259
260
261
262
# File 'lib/miga/project.rb', line 258

def result(name)
  dir = @@RESULT_DIRS[name.to_sym]
  return nil if dir.nil?
  MiGA::Result.load("#{path}/data/#{dir}/miga-project.json")
end

#resultsObject

Get all results, an Array of MiGA::Result.



266
267
268
# File 'lib/miga/project.rb', line 266

def results
  @@RESULT_DIRS.keys.map{ |k| result(k) }.reject{ |r| r.nil? }
end

#saveObject

Save any changes persistently.



141
142
143
144
# File 'lib/miga/project.rb', line 141

def save
  .save
  self.load
end

#typeObject

Type of project.



160
# File 'lib/miga/project.rb', line 160

def type ; [:type] ; end

#uninstall_plugin(path) ⇒ Object

Uninstall the plugin in the specified path.



372
373
374
375
376
377
378
# File 'lib/miga/project.rb', line 372

def uninstall_plugin(path)
  abs_path = File.absolute_path(path)
  raise "Plugin not currently installed: #{abs_path}." if
    [:plugins].nil? or not [:plugins].include?(abs_path)
  self.[:plugins].delete(abs_path)
  save
end

Unlink dataset identified by name and return MiGA::Dataset.



219
220
221
222
223
224
225
# File 'lib/miga/project.rb', line 219

def unlink_dataset(name)
  d = dataset(name)
  return nil if d.nil?
  self.[:datasets].delete(name)
  save
  d
end

#unregistered_datasetsObject

Find all datasets with (potential) result files but are yet unregistered.



311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
# File 'lib/miga/project.rb', line 311

def unregistered_datasets
  datasets = []
  MiGA::Dataset.RESULT_DIRS.values.each do |dir|
    dir_p = "#{path}/data/#{dir}"
    next unless Dir.exist? dir_p
    Dir.entries(dir_p).each do |file|
      next unless
        file =~ %r{
          \.(fa(a|sta|stqc?)?|fna|solexaqa|gff[23]?|done|ess)(\.gz)?$
        }x
      m = /([^\.]+)/.match(file)
      datasets << m[1] unless m.nil? or m[1] == "miga-project"
    end
  end
  datasets.uniq - [:datasets]
end