Class: Transrate::Cmdline

Inherits:
Object
  • Object
show all
Defined in:
lib/transrate/cmdline.rb

Instance Method Summary collapse

Constructor Details

#initialize(args) ⇒ Cmdline

Returns a new instance of Cmdline.



11
12
13
14
15
16
17
18
# File 'lib/transrate/cmdline.rb', line 11

def initialize args
  @opts = parse_arguments args
  if @opts.examples
    print_examples
  end
  @report_width = 35
  check_arguments
end

Instance Method Details

#allowed_depsObject

check_dependencies



262
263
264
265
266
267
268
269
# File 'lib/transrate/cmdline.rb', line 262

def allowed_deps
  binkey = 'TRANSRATE_PACKAGED_BINARY'
  if ENV.has_key?(binkey) && ENV[binkey] == 'true'
    return ['read']
  else
    return ['read', 'ref', 'all']
  end
end

#analyse_assembly(assembly, r, result_path) ⇒ Object



362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
# File 'lib/transrate/cmdline.rb', line 362

def analyse_assembly(assembly, r, result_path)
  logger.info "Loading assembly: #{assembly}"
  a = Assembly.new assembly

  logger.info "Analysing assembly: #{assembly}"
  logger.info "Results will be saved in #{File.expand_path result_path}"

  contig_results = {}
  read_results = {}
  comparative_results = {}
  score, optimal, cutoff = ["NA", "NA", "NA"]

  FileUtils.mkdir_p result_path
  Dir.chdir result_path do
    transrater = Transrater.new(a, r, threads: @opts.threads)

    contig_results = contig_metrics transrater
    read_results = read_metrics transrater
    comparative_results = comparative_metrics transrater
    if (@opts.left && @opts.right)
      score, optimal, cutoff = assembly_score(assembly, transrater)
    end

    write_contig_csv a
  end

  contig_results.merge(read_results)
                .merge(comparative_results)
                .merge({ :assembly => assembly })
                .merge({ :score => score })
                .merge({ :optimal_score => optimal })
                .merge({ :cutoff => cutoff })

end

#argument_parserObject



56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# File 'lib/transrate/cmdline.rb', line 56

def argument_parser
  cmdline = self
  Trollop::Parser.new do
    version Transrate::VERSION::STRING.dup
    banner cmdline.help_message
    opt :assembly, "Assembly file(s) in FASTA format, comma-separated",
        :type => String
    opt :left, "Left reads file(s) in FASTQ format, comma-separated",
        :type => String
    opt :right, "Right reads file(s) in FASTQ format, comma-separated",
        :type => String
    opt :reference,
        "Reference proteome or transcriptome file in FASTA format",
        :type => String
    opt :threads, "Number of threads to use",
        :default => 8,
        :type => Integer
    opt :merge_assemblies,
        "Merge best contigs from multiple assemblies into file",
        :type => String
    opt :output, "Directory where results are output (will be created)",
        :default => 'transrate_results'
    opt :loglevel,
        "Log level. One of [error, info, warn, debug]",
        :default => 'info'
    opt :install_deps,
        "Install any missing dependencies. One of " +
        "[#{cmdline.allowed_deps.join(', ')}]",
        :type => String, :default => nil
    opt :examples, "Show some example commands with explanations"
  end
end

#assembly_result_path(assembly) ⇒ Object



407
408
409
410
# File 'lib/transrate/cmdline.rb', line 407

def assembly_result_path assembly
  path = assembly.gsub(File::SEPARATOR, '_')
  File.basename(path, File.extname(path))
end

#assembly_result_paths(assemblies) ⇒ Object

 analyse_assembly



397
398
399
400
401
402
403
404
405
# File 'lib/transrate/cmdline.rb', line 397

def assembly_result_paths assemblies
  if (assemblies.length == 1)
    return [File.basename(assemblies.first, File.extname(assemblies.first))]
  end
  paths = assemblies.map { |a| File.expand_path a }
  common_prefix = common_directory_path paths
  paths.map! { |p| p.to_path.gsub(common_prefix, "").gsub(/^\//, "") }
  paths.map { |p| assembly_result_path p }
end

#assembly_score(assembly, transrater) ⇒ Object



497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
# File 'lib/transrate/cmdline.rb', line 497

def assembly_score(assembly, transrater)
  score = transrater.assembly_score

  prefix = File.basename(assembly)
  optimal, cutoff = transrater.assembly_optimal_score prefix
  unless score.nil?
    pretty_print_hash({:TRANSRATE_ASSEMBLY_SCORE => score},
                      @report_width, 4)
    logger.info "-" * @report_width
    pretty_print_hash({:TRANSRATE_OPTIMAL_SCORE => optimal},
                      @report_width, 4)
    pretty_print_hash({:TRANSRATE_OPTIMAL_CUTOFF => cutoff},
                      @report_width, 4)
    pretty_print_hash(transrater.good_contigs, @report_width)
  end
  [score, optimal, cutoff]
end

#check_argumentsObject



162
163
164
165
166
167
168
# File 'lib/transrate/cmdline.rb', line 162

def check_arguments
  check_dependencies
  check_loglevel
  check_assembly
  check_reference
  check_reads
end

#check_assemblyObject



179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
# File 'lib/transrate/cmdline.rb', line 179

def check_assembly
  if @opts.assembly
    @opts[:assembly] = @opts.assembly.split(',').map do |a|
      File.expand_path a
    end.join(',')
    @opts.assembly.split(',').each do |assembly_file|
      unless File.exist?(assembly_file)
        raise TransrateIOError.new "Assembly fasta file does not exist: " +
                                   " #{assembly_file}"
      end
    end
  else
    raise TransrateArgError.new "Option --assembly must be specified. " +
                                "Try --help for help."
  end
end

#check_dependenciesObject



229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
# File 'lib/transrate/cmdline.rb', line 229

def check_dependencies
  # Check dependencies if they are relevant to the command issued,
  # and handle any commands to install missing ones
  gem_dir = Gem.loaded_specs['transrate'].full_gem_path
  gem_deps = File.join(gem_dir, 'deps', 'deps.yaml')
  blast_dep = File.join(gem_dir, 'deps', 'blast.yaml')

  deps, read_deps, ref_deps = nil
  unless @opts.install_deps.nil?
    check_install_command

    deps = @opts.install_deps == 'all'
    read_deps = @opts.install_deps == 'read'
    ref_deps = @opts.install_deps == 'ref'
  end

  if deps || read_deps || ref_deps
    # user has requested dependency installation
    puts "Checking dependencies"
    install_missing_dependencies(deps, read_deps, ref_deps,
                                 gem_deps, blast_dep)
  else
    # no dependency installation requested, but check dependencies
    # for the commands provided are installed
    missing = []
    missing = Bindeps.missing gem_deps if @opts.left
    blast_missing = []
    blast_missing = Bindeps.missing blast_dep if @opts.reference
    print_missing_dependencies(missing, blast_missing)
  end

end

#check_install_commandObject



271
272
273
274
275
276
277
# File 'lib/transrate/cmdline.rb', line 271

def check_install_command
  unless allowed_deps.include? @opts.install_deps
    msg = "install-deps #{@opts.install_deps} is not valid. " +
          "You must specify one of: #{allowed_deps.join(', ')}."
    raise TransrateError.new(msg)
  end
end

#check_loglevelObject



170
171
172
173
174
175
176
177
# File 'lib/transrate/cmdline.rb', line 170

def check_loglevel
  unless %w[error info warn debug].include? @opts.loglevel
    raise TransrateError.new "Loglevel #{@opts.loglevel} is not valid. " +
    "It must be one of: error, info, warn, debug."
  end

  logger.level = Yell::Level.new @opts.loglevel.to_sym
end

#check_readsObject



206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
# File 'lib/transrate/cmdline.rb', line 206

def check_reads
  if @opts.left and @opts.right
    if @opts.left.split(",").length != @opts.right.split(",").length
      msg = "Please provide the same number of left reads as right reads"
      raise TransrateArgError.new msg
    end
    @opts[:left] = @opts.left.split(',').map { |f|
      File.expand_path f
    }.join(',')
    @opts[:right] = @opts.right.split(',').map { |f|
      File.expand_path f
    }.join(',')
    @opts.left.split(",").zip(@opts.right.split(",")).each do |left,right|
      if !File.exist?(left)
        raise TransrateIOError.new "Left read fastq file does not exist: #{left}"
      end
      if !File.exist?(right)
        raise TransrateIOError.new "Right read fastq file does not exist: #{right}"
      end
    end
  end
end

#check_referenceObject



196
197
198
199
200
201
202
203
204
# File 'lib/transrate/cmdline.rb', line 196

def check_reference
  if @opts.reference
    @opts[:reference] = File.expand_path @opts.reference
    if !File.exist?(@opts.reference)
      raise TransrateIOError.new "Reference fasta file does not exist: " +
                               " #{@opts.reference}"
    end
  end
end

#common_directory_path(dirs) ⇒ Object



412
413
414
415
416
# File 'lib/transrate/cmdline.rb', line 412

def common_directory_path(dirs)
  separator = File::SEPARATOR
  dir1, dir2 = dirs.minmax.map{ |dir| dir.split(separator) }
  dir1.zip(dir2).take_while{ |dn1,dn2| dn1==dn2 }.map(&:first).join(separator)
end

#comparative_metrics(transrater) ⇒ Object



475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
# File 'lib/transrate/cmdline.rb', line 475

def comparative_metrics transrater
  comparative_results = {}
  if @opts.reference
    logger.info "Calculating comparative metrics..."
    t0 = Time.now
    comparative_results = transrater.comparative_metrics.comp_stats

    if comparative_results
      logger.info "Comparative metrics:"
      logger.info "-" *  @report_width
      pretty_print_hash(comparative_results, @report_width)
    end

    logger.info "Comparative metrics done in #{(Time.now - t0).round} seconds"

    logger.info "-" * @report_width
  else
    logger.info "No reference provided, skipping comparative diagnostics"
  end
  comparative_results
end

#concatenate_assemblies(assemblies) ⇒ Object



342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
# File 'lib/transrate/cmdline.rb', line 342

def concatenate_assemblies assemblies
  merged_file = @opts.merge_assemblies
  merged = {}
  assemblies.each do |file|
    Bio::FastaFormat.open(file).each do |entry|
      contig_name = "#{File.basename(file,File.extname(file))}:"
      contig_name << "#{entry.entry_id}"
      merged[contig_name] = entry.seq
    end
  end
  logger.info "Merging assemblies into one file...'#{merged_file}'"
  File.open(merged_file, "wb") do |out|
    merged.each do |name, seq|
      out.write ">#{name}\n"
      out.write "#{seq}\n"
    end
  end
  merged_file
end

#contig_metrics(transrater) ⇒ Object

write_assembly_csv



439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
# File 'lib/transrate/cmdline.rb', line 439

def contig_metrics transrater
  logger.info "Calculating contig metrics..."
  t0 = Time.now
  contig_results = transrater.assembly_metrics.basic_stats
  contig_results.merge! transrater.assembly.contig_metrics.results

  if contig_results
    logger.info "Contig metrics:"
    logger.info "-" *  @report_width
    pretty_print_hash(contig_results, @report_width)
  end

  logger.info "Contig metrics done in #{(Time.now - t0).round} seconds"
  contig_results
end

#help_messageObject



94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# File 'lib/transrate/cmdline.rb', line 94

def help_message
<<-EOS

Transrate v#{Transrate::VERSION::STRING.dup}
by Richard Smith-Unna, Chris Boursnell, Rob Patro,
 Julian Hibberd, and Steve Kelly

DESCRIPTION:
Analyse a de-novo transcriptome assembly using three kinds of metrics:

1. sequence based (if --assembly is given)
2. read mapping based (if --left and --right are given)
3. reference based (if --reference is given)

Documentation at http://hibberdlab.com/transrate

USAGE:
transrate <options>

OPTIONS:

  EOS
end

#install_missing_dependencies(deps, read_deps, ref_deps, gem_deps, blast_dep) ⇒ Object



279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
# File 'lib/transrate/cmdline.rb', line 279

def install_missing_dependencies(deps, read_deps, ref_deps,
                                 gem_deps, blast_dep)
  missing = []
  if deps || read_deps
    Bindeps.require gem_deps
    missing += Bindeps.missing gem_deps
  end

  if deps || ref_deps
    Bindeps.require blast_dep
    missing += Bindeps.missing blast_dep
  end

  unless missing.empty?
    list = missing.collect {|i| "#{i.name}:#{i.version}"}.join("\n - ")
    msg = "Failed to install: \n - #{list}"
    raise TransrateError.new msg
  end

  puts "All dependencies installed"
  exit
end

#parse_arguments(args) ⇒ Object



45
46
47
48
49
50
51
52
53
54
# File 'lib/transrate/cmdline.rb', line 45

def parse_arguments args
  Trollop::with_standard_exception_handling argument_parser do
    if args.empty? || args.include?("-h") || args.include?("--help")
      transrate_banner
      raise Trollop::HelpNeeded
    end

    argument_parser.parse args
  end
end

#pretty_print_hash(hash, width, round = 2) ⇒ Object



325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
# File 'lib/transrate/cmdline.rb', line 325

def pretty_print_hash(hash, width, round=2)
  hash.map do |k, v|
    # show as float if there are any decimal places
    if v.to_f.round(round).to_s.split('.').last.to_i > 0
      v = v.to_f.round(round)
    end
    if v.is_a? Float
      v = v.round(round)
    end
    pad = (width - (k.to_s.length + v.to_s.length))
    pad = [pad, 0].max
    logger.info "#{k.to_s.split('_').join(' ')}" +
    "#{" " * pad}" +
    "#{v}"
  end
end


135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
# File 'lib/transrate/cmdline.rb', line 135

def print_examples
  msg = <<-EOS

  Transrate v#{Transrate::VERSION::STRING.dup}

  EXAMPLE COMMANDS:

  # check dependencies and install any that are missing
  transrate --install-deps all

  # get the transrate score for the assembly and each contig
  transrate --assembly contigs.fa --left left.fq --right right.fq

  # basic assembly metrics only
  transrate --assembly contigs.fa

  # basic and reference-based metrics with 8 threads
  transrate --assembly contigs.fa --reference ref.fa --threads 8

  # contig and read-based metrics for two assemblies with 32 threads
  transrate --assembly one.fa,two.fa --left l.fq --right r.fq --threads 32

  EOS
  puts msg.split("\n").map{ |line| line.lstrip }.join("\n")
  exit(0)
end

install_missing_dependencies



302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
# File 'lib/transrate/cmdline.rb', line 302

def print_missing_dependencies(missing, blast_missing)
  if missing.length + blast_missing.length > 0
    puts "Dependencies are missing:"

    missing.each do |dep|
      puts "  - #{dep.name} (#{dep.version})"
    end

    blast_missing.each do |dep|
      puts "  - #{dep.name} (#{dep.version})"
    end

    puts "To install all missing dependencies, run:"
    puts "  transrate --install-deps all"
    puts "If you only want the read-metrics dependencies:"
    puts "  transrate --install-deps read"
    puts "Or if you only want the reference-metrics dependencies: "
    puts "  transrate --install-deps ref"

    exit 1
  end
end

#read_metrics(transrater) ⇒ Object



455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
# File 'lib/transrate/cmdline.rb', line 455

def read_metrics transrater
  read_results = {}
  if (@opts.left && @opts.right)
    logger.info "Calculating read diagnostics..."
    t0 = Time.now
    read_results = transrater.read_metrics(@opts.left, @opts.right).read_stats

    if read_results
      logger.info "Read mapping metrics:"
      logger.info "-" *  @report_width
      pretty_print_hash(read_results, @report_width)
    end

    logger.info "Read metrics done in #{(Time.now - t0).round} seconds"
  else
    logger.info "No reads provided, skipping read diagnostics"
  end
  read_results
end

#runObject



20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# File 'lib/transrate/cmdline.rb', line 20

def run
  results = []

  assemblies = @opts.assembly.split(',')
  result_paths = assembly_result_paths assemblies

  r = @opts.reference ? Assembly.new(File.expand_path @opts.reference) : nil

  @opts.output = File.expand_path @opts.output
  FileUtils.mkdir_p @opts.output

  Dir.chdir @opts.output do
    if @opts.merge_assemblies
      assemblies = concatenate_assemblies assemblies
    end

    assemblies.zip(result_paths) do |assembly, result_path|
      results << analyse_assembly(assembly, r, result_path)
    end

    write_assembly_csv results
  end

end

#terminal_columnsObject



89
90
91
92
# File 'lib/transrate/cmdline.rb', line 89

def terminal_columns
  require 'io/console'
  IO.console.winsize.last
end

#transrate_bannerObject



118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
# File 'lib/transrate/cmdline.rb', line 118

def transrate_banner
  if terminal_columns > 70
    txp = '░▓▓▓^▓▓▓░'
    toptxp = txp.green
    midtxp = txp.yellow
    bottxp = txp.red
    puts <<-EOS
         _                                        _
        | |_  _ __  __ _  _ __   ___  _ __  __ _ | |_  ___
#{toptxp} | __|| '__|/ _` || '_ \\ / __|| '__|/ _` || __|/ _ \\ #{toptxp}
#{midtxp} | |_ | |  | (_| || | | |\\__ \\| |  | (_| || |_|  __/ #{midtxp}
#{bottxp}  \\__||_|   \\__,_||_| |_||___/|_|   \\__,_| \\__|\\___| #{bottxp}
    EOS
  end
  ""
end

#write_assembly_csv(results) ⇒ Object



418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
# File 'lib/transrate/cmdline.rb', line 418

def write_assembly_csv results
  outfile = "assemblies.csv"
  logger.info "Writing analysis results to #{outfile}"

  CSV.open(outfile, 'wb') do |file|

    keys = results[0].keys
    keys.delete(:assembly)
    head = [:assembly] + keys
    file << head
    results.each do |row|
      file << head.map { |x|
        entry = row[x]
        entry.is_a?(Float) ? entry.round(5) : entry
      }
    end

  end

end

#write_contig_csv(a) ⇒ Object



515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
# File 'lib/transrate/cmdline.rb', line 515

def write_contig_csv a
  # write contig metrics to file for each contig
  outfile = File.expand_path "contigs.csv"
  logger.info "Writing contig metrics for each contig to #{outfile}"
  # have option to turn off, default on
  first=true
  CSV.open(outfile, 'wb') do |csv|
    a.each do |name, contig|
      basic_metrics = {:contig_name => name}.merge(contig.basic_metrics)
      if @opts.reference
        comp_metrics = contig.comparative_metrics
        basic_metrics.merge!(comp_metrics)
      end
      if @opts.left and @opts.right
        read_metrics = contig.read_metrics
        basic_metrics.merge!(read_metrics)
      end
      if first
        csv << basic_metrics.keys
        first = false
      end
      csv << basic_metrics.values.map{ |x| x.is_a?(Float) ? x.round(6) : x }
    end
  end
end