Class: Bio::AssemblyGraphAlgorithms::Fluffer
- Inherits:
-
Object
- Object
- Bio::AssemblyGraphAlgorithms::Fluffer
- Includes:
- FinishM::Logging
- Defined in:
- lib/assembly/fluffer.rb
Defined Under Namespace
Classes: FlufferHalfResult, FlufferPathSet
Constant Summary collapse
- BEYOND_LEASH_LENGTH_FATE =
:beyond_leash_length
- TERMINAL_NODE_FATE =
:terminal_node
- DEAD_END_FATE =
:dead_end
Instance Method Summary collapse
-
#fluff(finishm_graph, leash_length, options = {}) ⇒ Object
Return an array of array of paths.
- #fluff_part1(finishm_graph, leash_length, options = {}) ⇒ Object
- #fluff_part2(half_results) ⇒ Object
Methods included from FinishM::Logging
Instance Method Details
#fluff(finishm_graph, leash_length, options = {}) ⇒ Object
Return an array of array of paths.
32 33 34 35 36 37 38 39 |
# File 'lib/assembly/fluffer.rb', line 32 def fluff(finishm_graph, leash_length, ={}) log.debug "Fluffing part 1.." if log.debug? half_results = fluff_part1(finishm_graph, leash_length, ) log.debug "Found fluff half results: #{half_results}" if log.debug? log.debug "Fluffing part 2.." if log.debug? return fluff_part2(half_results) end |
#fluff_part1(finishm_graph, leash_length, options = {}) ⇒ Object
41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 |
# File 'lib/assembly/fluffer.rb', line 41 def fluff_part1(finishm_graph, leash_length, ={}) # Get a set of all probes so that they can be checked against log.debug "Found #{finishm_graph.probe_nodes.reject{|node| node.nil?}.length} different probes that were in the final velvet graph" if log.debug? half_results = [] graph = finishm_graph.graph # For each probe in the graph finishm_graph.probe_nodes.each_with_index do |node, probe_index| # If the node is not found in the graph, then forget it if node.nil? half_results.push FlufferHalfResult.new else # probe was found in the graph. Start finding them paths. # start exploring the squid way golden_paths = [] #These is the full paths found golden_fragments = [] #paths to join up to the end already_visited_nodes = Set.new #Nodes that have already been explored golden_path_fates = [] #An array of how each of the paths were halted golden_onodes = Set.new #Nodes that stop exploration terminal_nodes = Set.new # Add all the nodes that are being probed, because we don't want double exploration # i.e. want probe1 => probe2, probe2 => probe3, but not probe1 => probe2 => probe3 finishm_graph.probe_nodes.each_with_index do |node, probe_index2| # Don't add the node itself. This is a special case which is already handled below unless probe_index == probe_index2 or node.nil? terminal_nodes << finishm_graph.velvet_oriented_node(probe_index2).to_settable end end stack = DS::Stack.new stack.push finishm_graph.initial_path_from_probe(probe_index) while current_path = stack.pop log.debug "Perhaps #{current_path}?" if log.debug? if golden_onodes.include?(current_path.last.to_settable) # Probably a golden fragment, unless the node found is in the current path. # if that is true, that's a loop along a golden path. first_index = nil current_path.each_with_index do |directed_node, i| if directed_node.node == current_path.last.node and directed_node.first_side == current_path.last.first_side first_index = i break end end if first_index == current_path.length-1 # Found another golden path(s) log.debug "Ran into a golden node" if log.debug? golden_fragments.push current_path current_path.each do |onode| golden_onodes << onode.to_settable end else # Loop found along a golden path or fragment log.debug "Found a loop along a golden path: #{current_path}" if log.debug? next end elsif already_visited_nodes.include?(current_path.last.to_settable) and !terminal_nodes.include?(current_path.last.to_settable) # Already seen this (non-golden) node, do nothing with it log.debug "Skipping #{current_path.last} since that has already been seen" if log.debug? next else if log.debug? second_last_node = current_path[current_path.length-2] second_last_node ||= 'initial_node' log.debug "That's a new node, #{second_last_node}/#{current_path.last}" if log.debug? end # if we aren't beyond the leash. Presumably this # doesn't happen much, but there is a possibility that the leash will # prevent a real path being discovered. If there is two or more paths to a node # and a path longer than the leash is discovered first, then all the # nodes on that leash will be marked as discovered when they really aren't # TODO: fix the above bug if leash_length.nil? or current_path.length_in_bp < leash_length # Found a new node for the user to play with already_visited_nodes << current_path.last.to_settable # Have we found a path to one of the other probes? if terminal_nodes.include?(current_path.last.to_settable) log.debug "Found the terminal node: #{current_path}" if log.debug? golden_paths.push current_path golden_path_fates.push TERMINAL_NODE_FATE else # Use an else statement here because we want exploration to stop when other probes are encountered # prep for next time # Sort node IDs to simplify testing next_nodes = current_path.neighbours_of_last_node(graph).sort{|n1, n2| -(n1.node.node_id <=> n2.node.node_id) } next_nodes.each do |n| path = current_path.copy path.add_oriented_node n log.debug "Pushing a path yet to be explored: #{path}" if log.debug? stack.push path end # If we are at a dead end, add this whole path as a golden path. This # is low coverage fluff, perhaps. Or it is nothing. if next_nodes.empty? log.debug "Found a dead end path: #{current_path}" if log.debug? golden_paths.push current_path golden_path_fates.push DEAD_END_FATE current_path.each do |onode| golden_onodes << onode.to_settable end end end else if log.debug? log.debug "Found a path that made it to the end of the leash, with path length #{current_path.length_in_bp} vs leash length #{leash_length}" log.debug "Path given up on: #{current_path}" log.debug "Path sequence given up on: #{current_path.sequence}" log.debug "Node lengths: #{current_path.collect{|n| n.node.length_alone}.join(',')}" end # Record this past-leash-length path golden_paths.push current_path golden_path_fates.push BEYOND_LEASH_LENGTH_FATE current_path.each do |onode| golden_onodes << onode.to_settable end end end end log.debug "Found #{golden_paths.length} golden paths and #{golden_fragments.length} golden fragments" if log.debug? fluff_half_result = FlufferHalfResult.new fluff_half_result.golden_paths = golden_paths fluff_half_result.golden_path_fragments = golden_fragments fluff_half_result.golden_path_fates = golden_path_fates half_results.push fluff_half_result end end return half_results end |
#fluff_part2(half_results) ⇒ Object
184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 |
# File 'lib/assembly/fluffer.rb', line 184 def fluff_part2(half_results) all_all_paths = [] half_results.each do |segment_half_result| # OK, so we've transformed the data into a state where there is # at least one path through the data # and tentacles hanging off various golden nodes. # Now separate out the paths and return the array. # First transform the data so it can be referenced by the end node terminal_golden_nodes_to_paths = {} segment_half_result.golden_path_fragments.each do |fragment| l = fragment.last.to_settable terminal_golden_nodes_to_paths[l] ||= [] terminal_golden_nodes_to_paths[l].push fragment end # Next backtrack through the paths # Each path starts at the beginning and ends at a # golden node all_paths = FlufferPathSet.new stack = DS::Stack.new # Push the golden path and all paths that finish at the last node segment_half_result.golden_paths.each_with_index do |golden_path, i| stack.push [golden_path, 0, segment_half_result.golden_path_fates[i]] end while array = stack.pop current_path = array[0] num_to_ignore = array[1] fate = array[2] log.debug "Defragging #{current_path.to_s}/#{fate}, ignoring the last #{num_to_ignore} nodes" if log.debug? all_paths.push current_path all_paths.fates.push fate # Iterate down this path, spawning new paths if there # are paths that intersect passed_nodes = [] current_path.trail.reverse.each_with_index do |onode, i| unless i < num_to_ignore #ignore the last one(s) because they've already been handled frags = terminal_golden_nodes_to_paths[onode.to_settable] log.debug "Offshoots from #{onode}: #{frags.nil? ? '[]' : frags.collect{|f| f.collect{|n| n.node_id}.join(',')}.join(' and ')}" if log.debug? if frags frags.each do |fragment| log.debug "Using an offshoot: #{fragment.to_s}" if log.debug? # The fragment extends from the beginning to the golden node, # the current node. So create a new complete path, # And push it to the stack. new_golden = fragment.copy log.debug "Adding #{new_golden.to_s} and #{passed_nodes.collect{|n| n.node_id}}" if log.debug? passed_nodes.reverse.each_with_index do |onode, i| new_golden.add_oriented_node onode end log.debug "Enqueueing: #{new_golden.to_s} ignoring the last #{i+1} nodes" if log.debug? stack.push [new_golden, i+1, fate] end end end passed_nodes.push onode end end # All the paths are in an array, just a linear series of distinct paths all_all_paths.push all_paths end return all_all_paths end |