Class: Bio::FinishM::Visualise
- Inherits:
-
Object
- Object
- Bio::FinishM::Visualise
- Includes:
- Logging
- Defined in:
- lib/finishm/visualise.rb
Constant Summary collapse
- DEFAULT_OPTIONS =
{ :min_adjoining_reads => 2, :max_adjoining_node_coverage => 300, :graph_search_leash_length => 20000, :interesting_probes => nil, :max_nodes => 50, :contig_end_length => 200 }
Instance Method Summary collapse
- #add_options(optparse_object, options) ⇒ Object
- #add_probe_options(optparse_object, options) ⇒ Object
- #add_scaffold_options(optparse_object, options) ⇒ Object
- #add_visualisation_options(optparse_object, options) ⇒ Object
- #create_graphviz_output(finishm_graph, options) ⇒ Object
- #find_paired_end_linkages(finishm_graph, node_array) ⇒ Object
- #generate_graph_from_assembly(read_input, options) ⇒ Object
- #generate_graph_from_nodes(read_input, options) ⇒ Object
- #generate_graph_from_probes(read_input, options) ⇒ Object
- #get_nodes_within_leash(finishm_graph, node_ids, options = {}) ⇒ Object
- #run(options, argv) ⇒ Object
- #validate_argv_length(argv) ⇒ Object
- #validate_assembly_options(options) ⇒ Object
- #validate_options(options, argv) ⇒ Object
- #validate_probe_options(options) ⇒ Object
- #validate_scaffold_options(options) ⇒ Object
- #validate_visualisation_options(options) ⇒ Object
-
#write_probe_to_node_map(probe_to_node_map_file, finishm_graph, names) ⇒ Object
Write to a file probe_to_node_map_file a map that shows the probe ID, which node that probe is on, and the name of the probe.
Methods included from Logging
Instance Method Details
#add_options(optparse_object, options) ⇒ Object
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 |
# File 'lib/finishm/visualise.rb', line 13 def (optparse_object, ) .merge! DEFAULT_OPTIONS optparse_object. = "\nUsage: finishm visualise --assembly-??? <output_visualisation_file> Visualise an assembly graph \n\n" optparse_object.separator "Output visualisation formats (one or more of these must be used)" (optparse_object, ) optparse_object.separator "Input genome information" optparse_object.separator "\nIf an assembly is to be done, there must be some definition of reads:\n\n" #TODO improve this help Bio::FinishM::ReadInput.new.(optparse_object, ) optparse_object.separator "\nOptional graph-exploration arguments:\n\n" (optparse_object, ) (optparse_object, ) optparse_object.separator "\nOptional graph-related arguments:\n\n" Bio::FinishM::GraphGenerator.new.(optparse_object, ) end |
#add_probe_options(optparse_object, options) ⇒ Object
88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
# File 'lib/finishm/visualise.rb', line 88 def (optparse_object, ) optparse_object.on("--probe-ids PROBE_IDS", Array, "explore from these probe IDs in the graph (comma separated). probe ID is the ID in the velvet Sequence file. See also --leash-length [default: don't start from a node, explore the entire graph]") do |arg| [:interesting_probes] = arg.collect do |read| read_id = read.to_i if read_id.to_s != read or read_id.nil? or read_id < 1 raise "Unable to parse probe ID #{read}, from #{arg}, cannot continue" end read_id end end optparse_object.on("--probe-ids-file PROBE_IDS_FILE", String, "explore from the probe IDs given in the file (1 probe ID per line). See also --leash-length [default: don't start from a node, explore the entire graph]") do |arg| raise "Cannot specify both --probe-ids and --probe-ids-file sorry" if [:interesting_probes] [:interesting_probes] = [] log.info "Reading probe IDs from file: `#{arg}'" File.foreach(arg) do |line| line.strip! next if line == '' or line.nil? read_id = line.to_i if read_id.to_s != line or read_id < 1 or read_id.nil? raise "Unable to parse probe ID #{line}, from file #{arg}, cannot continue" end [:interesting_probes].push read_id end log.info "Read #{[:interesting_probes].length} probes in" end optparse_object.on("--probe-names-file PROBE_NAMES_FILE", String, "explore from the probe names (i.e. the first word in the fasta/fastq header) given in the file (1 probe name per line). See also --leash-length [default: don't start from a node, explore the entire graph]") do |arg| raise "Cannot specify any two of --probe-names-file, --probe-ids and --probe-ids-file sorry" if [:interesting_probes] [:interesting_probe_names] = [] log.info "Reading probe names from file: `#{arg}'" File.foreach(arg) do |line| line.strip! next if line == '' or line.nil? [:interesting_probe_names].push line.split(/\s/)[0] end log.info "Read #{[:interesting_probe_names].length} probes names in" end optparse_object.on("--probe-to-node-map FILE", String, "Output a tab separated file containing the read IDs and their respective node IDs [default: no output]") do |arg| [:probe_to_node_map] = arg end optparse_object.on("--node-ids NODE_IDS", Array, "explore from these nodes in the graph (comma separated). Node IDs are the nodes in the velvet graph. See also --leash-length [default: don't start from a node, explore the entire graph]") do |arg| [:interesting_nodes] = arg.collect do |read| node_id = read.to_i if node_id.to_s != read or node_id.nil? or node_id < 1 raise "Unable to parse node ID #{read}, from #{arg}, cannot continue" end node_id end end optparse_object.on("--leash-length NUM", Integer, "Don't explore too far in the graph, only this far and not much more [default: unused unless --probe-ids or --nodes is specified, otherwise #{[:graph_search_leash_length] }]") do |arg| [:graph_search_leash_length] = arg end optparse_object.on("--max-nodes NUM", Integer, "Maximum number of nodes to explore out from each probe node, or 0 for no maximum [default: #{[:max_nodes] }]") do |arg| if arg==0 [:max_nodes] = nil else [:max_nodes] = arg end end end |
#add_scaffold_options(optparse_object, options) ⇒ Object
63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
# File 'lib/finishm/visualise.rb', line 63 def (optparse_object, ) optparse_object.on("--genomes FASTA_1[,FASTA_2...]", Array, "Fasta files of genomes used in the assembly. Required if --scaffolds is given [default: unused]") do |arg| [:assembly_files] = arg end optparse_object.on("--scaffolds SIDE_1[,SIDE_2...]", Array, "explore from these scaffold ends e.g 'contig1s' for the start of contig1, 'contig1e' for the end of contig1, and 'contig1,contig3e' for both sides of contig1 and the end of contig3 [default: unused]") do |arg| [:scaffold_sides] = arg.collect do |side| if side.match(/[se]$/) side else ["#{side}s","#{side}e"] end end.flatten end optparse_object.on("--overhang NUM", Integer, "Start assembling this far from the ends of the contigs [default: #{[:contig_end_length]}]") do |arg| [:contig_end_length] = arg.to_i end end |
#add_visualisation_options(optparse_object, options) ⇒ Object
45 46 47 48 49 50 51 52 53 54 55 |
# File 'lib/finishm/visualise.rb', line 45 def (optparse_object, ) optparse_object.on("--assembly-svg PATH", "Output assembly as a SVG file [default: off]") do |arg| [:output_graph_svg] = arg end optparse_object.on("--assembly-png PATH", "Output assembly as a PNG file [default: off]") do |arg| [:output_graph_png] = arg end optparse_object.on("--assembly-dot PATH", "Output assembly as a DOT file [default: off]") do |arg| [:output_graph_dot] = arg end end |
#create_graphviz_output(finishm_graph, options) ⇒ Object
215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 |
# File 'lib/finishm/visualise.rb', line 215 def create_graphviz_output(finishm_graph, ) log.info "Converting assembly to a graphviz.." gv = Bio::Assembly::ABVisualiser.new.graphviz(finishm_graph.graph, { :start_node_ids => [:start_node_ids], :nodes => [:nodes], :end_node_ids => [:end_node_ids], :paired_nodes_hash => [:paired_nodes_hash], :node_id_to_nickname => [:node_id_to_nickname] }) # Convert gv object to something actually pictorial if [:output_graph_png] log.info "Writing PNG #{[:output_graph_png] }" gv.output :png => [:output_graph_png], :use => :neato end if [:output_graph_svg] log.info "Writing SVG #{[:output_graph_svg] }" gv.output :svg => [:output_graph_svg], :use => :neato end if [:output_graph_dot] log.info "Writing DOT #{[:output_graph_dot] }" gv.output :dot => [:output_graph_dot], :use => :neato end end |
#find_paired_end_linkages(finishm_graph, node_array) ⇒ Object
389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 |
# File 'lib/finishm/visualise.rb', line 389 def find_paired_end_linkages(finishm_graph, node_array) return {} if @finder.nil? paired_end_links = {} node_array.each do |node| paired_end_links[node.node_id] = [] [Bio::Velvet::Graph::OrientedNodeTrail::START_IS_FIRST, Bio::Velvet::Graph::OrientedNodeTrail::END_IS_FIRST].each do |direction| onode = Bio::Velvet::Graph::OrientedNodeTrail::OrientedNode.new(node, direction) paired_end_links[node.node_id].push @finder.neighbours(onode).collect{|n| n.node.node_id}.uniq end paired_end_links[node.node_id].flatten! end return paired_end_links end |
#generate_graph_from_assembly(read_input, options) ⇒ Object
278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 |
# File 'lib/finishm/visualise.rb', line 278 def generate_graph_from_assembly(read_input, ) # Parse the genome fasta file in genomes = Bio::FinishM::InputGenome.parse_genome_fasta_files( [:assembly_files], [:contig_end_length], ) # Create hash of contig end name to probe index contig_name_to_probe = {} genomes.each do |genome| genome.scaffolds.each_with_index do |swaff, scaffold_index| probes = [ genome.first_probe(scaffold_index), genome.last_probe(scaffold_index) ] probes.each do |probe| key = nil if probe.side == :start key = "#{probe.contig.scaffold.name}s" elsif probe.side == :end key = "#{probe.contig.scaffold.name}e" else raise "Programming error" end if contig_name_to_probe.key?(key) log.error "Encountered multiple contigs with the same name, this might cause problems, so quitting #{key}" end contig_name_to_probe[key] = probe.index end end end # Gather a list of probe indexes that are of interest to the user interesting_probe_ids = [] if [:scaffold_sides] # If looking at specified ends nodes_to_start_from = [:scaffold_sides].collect do |side| if probe = contig_name_to_probe[side] interesting_probe_ids << probe else raise "Unable to find scaffold side in given genome: #{side}" end end log.info "Found #{interesting_probe_ids.length} scaffold sides in the assembly of interest" else # else looking at all the contig ends in all the genomes interesting_probe_ids = contig_name_to_probe.values log.info "Visualising all #{interesting_probe_ids.length} contig ends in all genomes" end # Generate the graph probe_sequences = genomes.collect{|genome| genome.probe_sequences}.flatten finishm_graph = Bio::FinishM::GraphGenerator.new.generate_graph(probe_sequences, read_input, ) # Convert probe IDs into node IDs interesting_node_ids = interesting_probe_ids.collect do |pid| finishm_graph.probe_nodes[pid].node_id end.uniq # create a nickname hash, id of node to name. Include all nodes even if they weren't specified directly (they only get visualised if they are within leash length of another) node_id_to_nickname = {} contig_name_to_probe.each do |name, probe| key = finishm_graph.probe_nodes[probe].node_id if node_id_to_nickname.key?(key) node_id_to_nickname[key] += " "+name else node_id_to_nickname[key] = name end end return finishm_graph, interesting_node_ids, node_id_to_nickname end |
#generate_graph_from_nodes(read_input, options) ⇒ Object
264 265 266 267 268 269 270 271 272 273 274 275 276 |
# File 'lib/finishm/visualise.rb', line 264 def generate_graph_from_nodes(read_input, ) # Looking based on nodes if [:interesting_nodes].length > 5 log.info "Targeting #{[:interesting_nodes].length} nodes #{[:interesting_nodes][0..4].join(', ') }, ..." else log.info "Targeting #{[:interesting_nodes].length} node(s) #{[:interesting_nodes].inspect}" end finishm_graph = Bio::FinishM::GraphGenerator.new.generate_graph([], read_input, ) interesting_node_ids = [:interesting_nodes] return finishm_graph, interesting_node_ids end |
#generate_graph_from_probes(read_input, options) ⇒ Object
240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 |
# File 'lib/finishm/visualise.rb', line 240 def generate_graph_from_probes(read_input, ) # Looking based on probes if [:interesting_probe_names] if [:interesting_probe_names].length > 5 log.info "Targeting #{[:interesting_probe_names].length} probes #{[:interesting_probe_names][0..4].join(', ') }, ..." else log.info "Targeting #{[:interesting_probe_names].length} probes #{[:interesting_probe_names].inspect}" end [:probe_read_names] = [:interesting_probe_names] else if [:interesting_probes].length > 5 log.info "Targeting #{[:interesting_probes].length} probes #{[:interesting_probes][0..4].join(', ') }, ..." else log.info "Targeting #{[:interesting_probes].length} probes #{[:interesting_probes].inspect}" end [:probe_reads] = [:interesting_probes] end finishm_graph = Bio::FinishM::GraphGenerator.new.generate_graph([], read_input, ) interesting_node_ids = finishm_graph.probe_nodes.reject{|n| n.nil?}.collect{|node| node.node_id} return finishm_graph, interesting_node_ids end |
#get_nodes_within_leash(finishm_graph, node_ids, options = {}) ⇒ Object
353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 |
# File 'lib/finishm/visualise.rb', line 353 def get_nodes_within_leash(finishm_graph, node_ids, ={}) log.info "Finding nodes within the leash length of #{[:graph_search_leash_length] } with maximum node count #{[:max_nodes] }.." dijkstra = Bio::AssemblyGraphAlgorithms::Dijkstra.new @finder = Bio::FinishM::PairedEndNeighbourFinder.new(finishm_graph, 500) #TODO: this hard-coded 100 isn't great here @finder.min_adjoining_reads = [:min_adjoining_reads] @finder.max_adjoining_node_coverage = [:max_adjoining_node_coverage] nodes_within_leash_hash = dijkstra.min_distances_from_many_nodes_in_both_directions( finishm_graph.graph, node_ids.collect{|n| finishm_graph.graph.nodes[n]}, { :ignore_directions => true, :leash_length => [:graph_search_leash_length], :max_nodes => [:max_nodes], :neighbour_finder => @finder }) nodes_within_leash = nodes_within_leash_hash.keys.collect{|k| finishm_graph.graph.nodes[k[0]]} log.info "Found #{nodes_within_leash.collect{|o| o.node_id}.uniq.length} node(s) within the leash length" # These nodes are at the end of the leash - a node is in here iff # it has a neighbour that is not in the nodes_within_leash node_ids_at_leash = Set.new nodes_within_leash_hash.keys.each do |node_and_direction| # Add it to the set if 1 or more nieghbours are not in the original set node = finishm_graph.graph.nodes[node_and_direction[0]] onode = Bio::Velvet::Graph::OrientedNodeTrail::OrientedNode.new node, node_and_direction[1] onode.next_neighbours(finishm_graph.graph).each do |oneigh| if !nodes_within_leash_hash.key?(oneigh.to_settable) node_ids_at_leash << node_and_direction[0] break #it only takes one to be listed end end end return nodes_within_leash.uniq, node_ids_at_leash.to_a.uniq end |
#run(options, argv) ⇒ Object
167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 |
# File 'lib/finishm/visualise.rb', line 167 def run(, argv) read_input = Bio::FinishM::ReadInput.new read_input. # Generate the assembly graph log.info "Reading in or generating the assembly graph" if [:interesting_probes] or [:interesting_probe_names] finishm_graph, interesting_node_ids = generate_graph_from_probes(read_input, ) if ([:interesting_probes] or [:interesting_probe_names]) and [:probe_to_node_map] write_probe_to_node_map([:probe_to_node_map], finishm_graph, [:interesting_probes]) end elsif [:interesting_nodes] finishm_graph = generate_graph_from_nodes(read_input, ) interesting_node_ids = [:interesting_nodes] elsif [:assembly_files] finishm_graph, interesting_node_ids, node_id_to_nickname = generate_graph_from_assembly(read_input, ) [:node_id_to_nickname] = node_id_to_nickname else # Visualising the entire graph finishm_graph = Bio::FinishM::GraphGenerator.new.generate_graph([], read_input, ) end if [:graph_search_leash_length] and interesting_node_ids #log.info "Finding nodes within the leash length of #{options[:graph_search_leash_length] }.." nodes_within_leash, node_ids_at_leash = get_nodes_within_leash(finishm_graph, interesting_node_ids, ) log.info "Found #{node_ids_at_leash.length} nodes at the end of the #{[:graph_search_leash_length] }bp leash" if [:graph_search_leash_length] .merge!({ :start_node_ids => interesting_node_ids, :nodes => nodes_within_leash, :end_node_ids => node_ids_at_leash, }) else [:nodes] = finishm_graph.graph.nodes end # Determine paired-end connections log.info "Determining paired-end node connections.." paired_end_links = find_paired_end_linkages(finishm_graph, [:nodes]) [:paired_nodes_hash] = paired_end_links create_graphviz_output(finishm_graph, ) end |
#validate_argv_length(argv) ⇒ Object
161 162 163 164 165 |
# File 'lib/finishm/visualise.rb', line 161 def validate_argv_length(argv) if argv.length != 0 return "Dangling argument(s) found e.g. #{argv[0] }" end end |
#validate_assembly_options(options) ⇒ Object
154 155 156 157 158 159 |
# File 'lib/finishm/visualise.rb', line 154 def () # Need reads unless there is already an assembly unless [:previous_assembly] or [:previously_serialized_parsed_graph_file] return Bio::FinishM::ReadInput.new.(, []) end end |
#validate_options(options, argv) ⇒ Object
35 36 37 38 39 40 41 42 43 |
# File 'lib/finishm/visualise.rb', line 35 def (, argv) #TODO: give a better description of the error that has occurred #TODO: require reads options return validate_argv_length(argv) || () || () || () || () end |
#validate_probe_options(options) ⇒ Object
148 149 150 151 152 |
# File 'lib/finishm/visualise.rb', line 148 def () if [:interesting_probes] and [:interesting_nodes] return "Can only be interested in probes or nodes, not both, at least currently" end end |
#validate_scaffold_options(options) ⇒ Object
81 82 83 84 85 86 |
# File 'lib/finishm/visualise.rb', line 81 def () # If scaffolds are defined, then probe genomes must also be defined if [:scaffold_sides] and ![:assembly_files] return "If --scaffolds is given, then --genomes must also be given" end end |
#validate_visualisation_options(options) ⇒ Object
57 58 59 60 61 |
# File 'lib/finishm/visualise.rb', line 57 def () if [:output_graph_png].nil? and [:output_graph_svg].nil? and [:output_graph_dot].nil? return "No visualisation output format/file given, don't know how to visualise" end end |
#write_probe_to_node_map(probe_to_node_map_file, finishm_graph, names) ⇒ Object
Write to a file probe_to_node_map_file a map that shows the probe ID, which node that probe is on, and the name of the probe
408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 |
# File 'lib/finishm/visualise.rb', line 408 def write_probe_to_node_map(probe_to_node_map_file, finishm_graph, names) log.info "Writing probe-to-node map to #{x}.." File.open(probe_to_node_map_file,'w') do |f| f.puts %w(probe_number probe node direction).join("\t") finishm_graph.probe_nodes.each_with_index do |node, i| if node.nil? f.puts [ i+1, names[i], '-', '-', ].join("\t") else f.puts [ i+1, names[i], node.node_id, finishm_graph.probe_node_directions[i] == true ? 'forward' : 'reverse', ].join("\t") end end end end |