Class: Bio::FinishM::Fluff

Inherits:
Object
  • Object
show all
Includes:
Logging
Defined in:
lib/finishm/fluff.rb

Instance Method Summary collapse

Methods included from Logging

#log

Instance Method Details

#add_options(optparse_object, options) ⇒ Object



4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# File 'lib/finishm/fluff.rb', line 4

def add_options(optparse_object, options)
  optparse_object.banner = "\nUsage: finishm fluff --contigs <contig_file> --fastq-gz <reads..> --output-fluff-file <output.fa>

  Takes a set of contigs, and places probes across them (e.g. every 2kb), and then explores the
  graph from each of these probes, taking all paths within some leash length, including the 'fluff'
  which is not the same path as along the contig. Prints out all of these paths to a fasta file.\n\n"

  options.merge!({
    :probe_spacing => 2000,
    :probe_length => 100,
    :graph_search_leash_length => 20000,
  })

  optparse_object.separator "\nRequired arguments:\n\n"
  optparse_object.on("--contigs FILE", "fasta file containing contigs to find the fluff on [required]") do |arg|
    options[:contigs_file] = arg
  end
  optparse_object.on("--output-fluff-file PATH", "Output found paths to this file in fasta format [required]") do |arg|
    options[:output_fluff_file] = arg
  end
  optparse_object.separator "\nThere must be some definition of reads too:\n\n" #TODO improve this help
  Bio::FinishM::ReadInput.new.add_options(optparse_object, options)

  optparse_object.separator "\nOptional arguments:\n\n"
  optparse_object.on("--probe-spacing NUM", Integer, "Distance between probe points in the contig [default: #{options[:probe_spacing]}]") do |arg|
    options[:probe_spacing] = arg
  end
  optparse_object.on("--probe-size NUM", Integer, "Length of the probe to be inserted into the velvet graph. Must be greater than graph kmer length. [default: #{options[:probe_length]}]") do |arg|
    options[:probe_length] = arg
  end
  optparse_object.on("--leash-length NUM", Integer, "Don't explore too far in the graph, only this far and not much more [default: #{options[:graph_search_leash_length]}]") do |arg|
    options[:graph_search_leash_length] = arg
  end
  optparse_object.on("--assembly-png PATH", "Output assembly as a PNG file [default: off]") do |arg|
    options[:output_graph_png] = arg
  end
  optparse_object.on("--assembly-svg PATH", "Output assembly as an SVG file [default: off]") do |arg|
    options[:output_graph_svg] = arg
  end
  optparse_object.on("--assembly-dot PATH", "Output assembly as an DOT file [default: off]") do |arg|
    options[:output_graph_dot] = arg
  end

  Bio::FinishM::GraphGenerator.new.add_options optparse_object, options
end

#run(options, argv) ⇒ Object



74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# File 'lib/finishm/fluff.rb', line 74

def run(options, argv)
  # Read in all the contigs sequences
  probe_sequences = []
  sequence_names = []
  Bio::FlatFile.foreach(options[:contigs_file]) do |seq|
    sequence_names.push seq.definition

    sequence = seq.seq
    0.step(sequence.length-1-options[:probe_length], options[:probe_spacing]) do |offset|
      # Only probe in the forward direction
      probe_sequence = sequence[offset...offset+options[:probe_length]]
      probe_sequences.push probe_sequence
    end
  end
  log.info "Searching from #{probe_sequences.length} different probes from #{sequence_names.length} contigs)"

  # Generate the graph with the probe sequences in it.
  read_input = Bio::FinishM::ReadInput.new
  read_input.parse_options options
  finishm_graph = Bio::FinishM::GraphGenerator.new.generate_graph(probe_sequences, read_input, options)

  # Loop over the ends, trying to make connections from each one
  fluffer = Bio::AssemblyGraphAlgorithms::Fluffer.new
  fluffings = fluffer.fluff(finishm_graph, options[:graph_search_leash_length])
  log.debug "Found these fluffings: #{fluffings}" if log.debug?
  log.info "Found #{fluffings.collect{|sets| sets.length}.reduce(:+)} paths in total" if log.info?

  if options[:output_graph_png] or options[:output_graph_svg] or options[:output_graph_dot]
    log.info "Converting assembly to a graphviz PNG"
    viser = Bio::Assembly::ABVisualiser.new
    gv = viser.graphviz(finishm_graph.graph, {:start_node_ids => finishm_graph.probe_nodes.collect{|node| node.node_id}})

    gv.output :png => options[:output_graph_png], :use => :neato if options[:output_graph_png]
    gv.output :svg => options[:output_graph_svg], :use => :neato if options[:output_graph_svg]
    gv.output :dot => options[:output_graph_dot] if options[:output_graph_dot]
  end

  # Print out the sequences
  File.open(options[:output_fluff_file], 'w') do |output|
    fluffings.each_with_index do |path_set, probe_number|
      path_set.each_with_index do |path, path_number|
        fate = path_set.fates[path_number]
        output.puts ">probe#{probe_number+1}_path#{path_number+1} #{fate}"
        output.puts path.sequence
      end
    end
  end
end

#validate_options(options, argv) ⇒ Object



50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# File 'lib/finishm/fluff.rb', line 50

def validate_options(options, argv)
  #TODO: give a better description of the error that has occurred
  #TODO: require reads options
  if argv.length != 0
    return "Dangling argument(s) found e.g. #{argv[0]}"
  else
    [
      :contigs_file,
      :output_fluff_file
    ].each do |sym|
      if options[sym].nil?
        return "No option found to specify #{sym}."
      end
    end

    unless options[:velvet_kmer_size] < options[:probe_length]
      return "The probe length must be greater than the kmer length, otherwise it will not be incorporated into the kmer graph"
    end

    #if return nil from here, options all were parsed successfully
    return Bio::FinishM::ReadInput.new.validate_options(options, [])
  end
end