Class: Bio::FinishM::GapFiller

Inherits:
Object
  • Object
show all
Includes:
Logging
Defined in:
lib/finishm/connect.rb,
lib/finishm/gapfiller.rb

Instance Method Summary collapse

Methods included from Logging

#log

Instance Method Details

#add_options(optparse_object, options) ⇒ Object



6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# File 'lib/finishm/connect.rb', line 6

def add_options(optparse_object, options)
  optparse_object.banner = "\nUsage: finishm distance --read-sets <set1.fasta>,<set2.fasta>[,<set3.fasta>...] <assembly-specification>

Takes two or more set of reads determines the minimum distance between in a de-Bruijn graph traversal.

example: finishm distance --read-sets gene1reads.fa,gene2reads.fa,gene3reads.fa --fastq-gz reads.1.fq.gz,reads.2.fq.gz
\n"

  options.merge!({
    :graph_search_leash_length => 20000,
    })

  optparse_object.separator "\nRequired arguments:\n\n"
  optparse_object.on("--read-sets FILES", Array, "comma-separated list of fasta files containing read sets to connect [required]") do |arg|
    options[:read_sets] = arg
  end

  optparse_object.separator "\nThere must be some definition of of how to do the assembly, or else a path to a previous assembly directory:\n\n"
  Bio::FinishM::ReadInput.new.add_options(optparse_object, options)
  Bio::FinishM::GraphGenerator.new.add_options optparse_object, options

  optparse_object.separator "\nOptional graph search options:\n\n"
  optparse_object.on("--leash-length NUM", Integer, "Don't explore too far in the graph, only this many base pairs and not (much) more [default: #{options[:graph_search_leash_length] }]") do |arg|
    options[:graph_search_leash_length] = arg
  end
end

#gapfill(finishm_graph, probe_index1, probe_index2, options) ⇒ Object

Given a finishm graph, gapfill from the first probe to the second. Return a Bio::AssemblyGraphAlgorithms::ContigPrinter::AnchoredConnection object



282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
# File 'lib/finishm/gapfiller.rb', line 282

def gapfill(finishm_graph, probe_index1, probe_index2, options)
  start_onode = finishm_graph.velvet_oriented_node(probe_index1)
  end_onode_inward = finishm_graph.velvet_oriented_node(probe_index2)
  unless start_onode and end_onode_inward
    raise "Unable to retrieve both probes from the graph for gap #{gap_number} (#{gap.coords}), fail"
  end

  # The probe from finishm_graph points in the wrong direction for path finding
  end_onode = Bio::Velvet::Graph::OrientedNodeTrail::OrientedNode.new
  end_onode.node = end_onode_inward.node
  end_onode.first_side = end_onode_inward.starts_at_start? ? Bio::Velvet::Graph::OrientedNodeTrail::END_IS_FIRST : Bio::Velvet::Graph::OrientedNodeTrail::START_IS_FIRST

  adjusted_leash_length = finishm_graph.adjusted_leash_length(probe_index1, options[:graph_search_leash_length])
  log.debug "Using adjusted leash length #{adjusted_leash_length }" if log.debug?

  cartographer = Bio::AssemblyGraphAlgorithms::AcyclicConnectionFinder.new
  trails = cartographer.find_trails_between_nodes(
    finishm_graph.graph, start_onode, end_onode, adjusted_leash_length, {
      :recoherence_kmer => options[:recoherence_kmer],
      :sequences => finishm_graph.velvet_sequences,
      :max_explore_nodes => options[:max_explore_nodes],
      :max_gapfill_paths => options[:max_gapfill_paths],
      }
    )
  if trails.circular_paths_detected
    log.warn "Circular path detected here, not attempting to gapfill"
  end
  # Convert the trails into OrientedNodePaths
  trails = trails.collect do |trail|
    path = Bio::Velvet::Graph::OrientedNodeTrail.new
    path.trail = trail
    path
  end

  acon = Bio::AssemblyGraphAlgorithms::ContigPrinter::AnchoredConnection.new
  acon.start_probe_noded_read = finishm_graph.probe_node_reads[probe_index1]
  acon.end_probe_noded_read = finishm_graph.probe_node_reads[probe_index2]
  acon.start_probe_contig_offset = options[:contig_end_length]
  acon.end_probe_contig_offset = options[:contig_end_length]
  acon.paths = trails

  return acon
end

#run(options, argv) ⇒ Object



52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/finishm/connect.rb', line 52

def run(options, argv)
  # read in fasta file of read sets

  # create a finishm graph with each of the reads in the read sets as probes

  # Determine which nodes contain the reads, and choose two reads from each detected node as examples

  # Dijkstra out from each of the probe nodes, ignoring direction

  # Detemine connections between readsets given the minimum distances

  # Output connectivity information
end

#validate_options(options, argv) ⇒ Object



33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# File 'lib/finishm/connect.rb', line 33

def validate_options(options, argv)
  #TODO: give a better description of the error that has occurred
  #TODO: require reads options
  if argv.length != 0
    return "Dangling argument(s) found e.g. #{argv[0] }"
  else
    [
      :read_sets,
      ].each do |sym|
        if options[sym].nil?
          return "No option found to specify #{sym}"
        end
      end

    #if return nil from here, options all were parsed successfully
    return Bio::FinishM::ReadInput.new.validate_options(options, [])
  end
end