Class: Bio::FinishM::Fluff
- Inherits:
-
Object
- Object
- Bio::FinishM::Fluff
- Includes:
- Logging
- Defined in:
- lib/finishm/fluff.rb
Instance Method Summary collapse
- #add_options(optparse_object, options) ⇒ Object
- #run(options, argv) ⇒ Object
- #validate_options(options, argv) ⇒ Object
Methods included from Logging
Instance Method Details
#add_options(optparse_object, options) ⇒ Object
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
# File 'lib/finishm/fluff.rb', line 4 def (optparse_object, ) optparse_object. = "\nUsage: finishm fluff --contigs <contig_file> --fastq-gz <reads..> --output-fluff-file <output.fa> Takes a set of contigs, and places probes across them (e.g. every 2kb), and then explores the graph from each of these probes, taking all paths within some leash length, including the 'fluff' which is not the same path as along the contig. Prints out all of these paths to a fasta file.\n\n" .merge!({ :probe_spacing => 2000, :probe_length => 100, :graph_search_leash_length => 20000, }) optparse_object.separator "\nRequired arguments:\n\n" optparse_object.on("--contigs FILE", "fasta file containing contigs to find the fluff on [required]") do |arg| [:contigs_file] = arg end optparse_object.on("--output-fluff-file PATH", "Output found paths to this file in fasta format [required]") do |arg| [:output_fluff_file] = arg end optparse_object.separator "\nThere must be some definition of reads too:\n\n" #TODO improve this help Bio::FinishM::ReadInput.new.(optparse_object, ) optparse_object.separator "\nOptional arguments:\n\n" optparse_object.on("--probe-spacing NUM", Integer, "Distance between probe points in the contig [default: #{[:probe_spacing]}]") do |arg| [:probe_spacing] = arg end optparse_object.on("--probe-size NUM", Integer, "Length of the probe to be inserted into the velvet graph. Must be greater than graph kmer length. [default: #{[:probe_length]}]") do |arg| [:probe_length] = arg end optparse_object.on("--leash-length NUM", Integer, "Don't explore too far in the graph, only this far and not much more [default: #{[:graph_search_leash_length]}]") do |arg| [:graph_search_leash_length] = arg end optparse_object.on("--assembly-png PATH", "Output assembly as a PNG file [default: off]") do |arg| [:output_graph_png] = arg end optparse_object.on("--assembly-svg PATH", "Output assembly as an SVG file [default: off]") do |arg| [:output_graph_svg] = arg end optparse_object.on("--assembly-dot PATH", "Output assembly as an DOT file [default: off]") do |arg| [:output_graph_dot] = arg end Bio::FinishM::GraphGenerator.new. optparse_object, end |
#run(options, argv) ⇒ Object
74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 |
# File 'lib/finishm/fluff.rb', line 74 def run(, argv) # Read in all the contigs sequences probe_sequences = [] sequence_names = [] Bio::FlatFile.foreach([:contigs_file]) do |seq| sequence_names.push seq.definition sequence = seq.seq 0.step(sequence.length-1-[:probe_length], [:probe_spacing]) do |offset| # Only probe in the forward direction probe_sequence = sequence[offset...offset+[:probe_length]] probe_sequences.push probe_sequence end end log.info "Searching from #{probe_sequences.length} different probes from #{sequence_names.length} contigs)" # Generate the graph with the probe sequences in it. read_input = Bio::FinishM::ReadInput.new read_input. finishm_graph = Bio::FinishM::GraphGenerator.new.generate_graph(probe_sequences, read_input, ) # Loop over the ends, trying to make connections from each one fluffer = Bio::AssemblyGraphAlgorithms::Fluffer.new fluffings = fluffer.fluff(finishm_graph, [:graph_search_leash_length]) log.debug "Found these fluffings: #{fluffings}" if log.debug? log.info "Found #{fluffings.collect{|sets| sets.length}.reduce(:+)} paths in total" if log.info? if [:output_graph_png] or [:output_graph_svg] or [:output_graph_dot] log.info "Converting assembly to a graphviz PNG" viser = Bio::Assembly::ABVisualiser.new gv = viser.graphviz(finishm_graph.graph, {:start_node_ids => finishm_graph.probe_nodes.collect{|node| node.node_id}}) gv.output :png => [:output_graph_png], :use => :neato if [:output_graph_png] gv.output :svg => [:output_graph_svg], :use => :neato if [:output_graph_svg] gv.output :dot => [:output_graph_dot] if [:output_graph_dot] end # Print out the sequences File.open([:output_fluff_file], 'w') do |output| fluffings.each_with_index do |path_set, probe_number| path_set.each_with_index do |path, path_number| fate = path_set.fates[path_number] output.puts ">probe#{probe_number+1}_path#{path_number+1} #{fate}" output.puts path.sequence end end end end |
#validate_options(options, argv) ⇒ Object
50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
# File 'lib/finishm/fluff.rb', line 50 def (, argv) #TODO: give a better description of the error that has occurred #TODO: require reads options if argv.length != 0 return "Dangling argument(s) found e.g. #{argv[0]}" else [ :contigs_file, :output_fluff_file ].each do |sym| if [sym].nil? return "No option found to specify #{sym}." end end unless [:velvet_kmer_size] < [:probe_length] return "The probe length must be greater than the kmer length, otherwise it will not be incorporated into the kmer graph" end #if return nil from here, options all were parsed successfully return Bio::FinishM::ReadInput.new.(, []) end end |