Class: Bio::AssemblyGraphAlgorithms::HeightFinder

Inherits:
Object
  • Object
show all
Includes:
FinishM::Logging
Defined in:
lib/assembly/height_finder.rb

Defined Under Namespace

Classes: CyclicTraversalNode, TraversalNode

Instance Method Summary collapse

Methods included from FinishM::Logging

#log

Instance Method Details

#find_oriented_edge_of_range(graph, nodes = nil) ⇒ Object



289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
# File 'lib/assembly/height_finder.rb', line 289

def find_oriented_edge_of_range(graph, nodes=nil)
  nodes ||= graph.nodes
  log.debug "Looking for oriented start and end points from #{nodes.collect{|n| n.node_id}.join(',')}" if log.debug?
  nodes_all_directions = nodes.collect{|node| [[node, true], [node, false]]}.flatten(1)


  # Find nodes and directions which are not reachable from other nodes within range
  unreached_nodes = {}
  nodes_all_directions.each do |node_and_direction|
    onode = Bio::Velvet::Graph::OrientedNodeTrail::OrientedNode.new node_and_direction[0], node_and_direction[1]
    unless unreached_nodes.has_key? onode.to_settable
      unreached_nodes[onode.to_settable] = onode
    end
    onode.next_neighbours(graph).each do |oneigh|
      unreached_nodes[oneigh.to_settable] = nil
    end
  end

  entry_points = unreached_nodes.values.reject{|n| n.nil?}
  log.debug "Found the following nodes for a particular orientation have no paths connecting to other nodes in range: #{entry_points.collect{|n| n.to_shorthand}.join(',')}" if log.debug?

  # Start from an unreachable node, and trace all paths until the reverse end of other unreachable nodes
  # are reached, which are then defined as 'end' nodes. When finished, choose a remaining non-end unreachable
  # node and repeat, stopping paths if an already seen node is encountered.
  seen_nodes = Set.new
  start_onodes = []
  end_onodes = []
  stack = DS::Stack.new
  entry_points.reverse.each do |onode|
    stack.push onode
  end

  while current_node = stack.pop
    log.debug "At node #{current_node.to_shorthand}" if log.debug?

    node_id = current_node.node_id
    if seen_nodes.include? node_id or not nodes.include? current_node.node
      log.debug "Node has been seen or is out of range. Skipping..." if log.debug?
      next
    end
    seen_nodes << node_id

    current_unreached = unreached_nodes[current_node.to_settable]
    log.debug "Is current unreached? #{current_unreached}" if log.debug?
    if current_unreached
      log.debug "Defining starting node #{current_unreached.to_shorthand}" if log.debug?
      # Found start node
      start_onodes.push current_unreached
    else
      reverse_unreached = unreached_nodes[current_node.reverse.to_settable]
      log.debug "Is reverse unreached? #{reverse_unreached}" if log.debug?
      if reverse_unreached
        log.debug "Found ending node #{reverse_unreached.to_shorthand}" if log.debug?
        # Found end node
        end_onodes.push reverse_unreached
      end
    end

    current_node.next_neighbours(graph).each do |onode|
      log.debug "Adding neighbour #{onode.to_shorthand} to stack" if log.debug?
      stack.push onode
    end
  end

  return start_onodes, end_onodes
end

#max_paths_through(by_height) ⇒ Object

maximum paths



227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
# File 'lib/assembly/height_finder.rb', line 227

def max_paths_through(by_height)
  max_paths_from = {}
  by_height.each_with_index do |nodes, level|
    log.debug "At height #{level}." if log.debug?
    if level == 0  # tips
      nodes.each do |node|
        log.debug "Counted maximum of 1 path to #{node.describe}." if log.debug?
        max_paths_from[node.onode.to_settable] = 1
      end
      next
    end

    nodes.each do |node|
      settable = node.onode.to_settable
      max_paths_from_neighbours = node.nodes_out.collect{|nbr| max_paths_from[nbr.onode.to_settable]}.reject{|n| n.nil?}
      log.debug "Found neighbours of #{node.describe} with maximum paths #{max_paths_from_neighbours.join(',')}." if log.debug?
      max_paths_from[settable] = max_paths_from_neighbours.reduce{|memo, num| memo+num}
      log.debug "Counted maximum of #{max_paths_from[settable]} paths to #{node.describe}." if log.debug?
    end
  end

  # Get the graph roots (which are nodes with no parents) and add max_paths_from for each to get graph total
  root_keys = by_height.flatten.select{|node| node.nodes_in.empty? }.collect{|node| node.onode.to_settable}
  log.debug "Found graph roots #{root_keys.collect{|settable| settable[0]}.join(',')} with maximum paths #{root_keys.collect{|key| max_paths_from[key]}.join(',')}." if log.debug?
  max_paths = root_keys.map{|settable| max_paths_from[settable]}.reduce{|memo, num| memo+num}
  log.debug "Counted maximum of #{max_paths} through graph." if log.debug?
  return max_paths
end

#min_paths_through(by_height) ⇒ Object

minimum paths



257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
# File 'lib/assembly/height_finder.rb', line 257

def min_paths_through(by_height)
  live_nodes = Set.new
  max_alive_counter = 0
  by_height.each_with_index do |nodes, level|
    log.debug "At height #{level}." if log.debug?
    # nodes at current level become live
    nodes.each do |node|
      settable = node.onode.to_settable
      log.debug "Setting #{node.describe} as live." if log.debug?
      live_nodes << settable
    end
    if level > 0
      #children of nodes at current level are no longer live
      nodes.each do |node|
        children = node.nodes_out
        children.each do |nbr|
          log.debug "Setting child #{nbr.describe} of live node #{node.describe} as inactive." if log.debug?
          live_nodes.delete(nbr.onode.to_settable)
        end
      end
    end

    log.debug "There are currently #{live_nodes.length} nodes alive. Max is #{max_alive_counter}." if log.debug?
    if live_nodes.length > max_alive_counter
      #track the maximum live nodes at any level
      log.debug "Updating max to #{live_nodes.length}." if log.debug?
      max_alive_counter = live_nodes.length
    end
  end
  return max_alive_counter
end

#traverse(graph, initial_nodes, options = {}) ⇒ Object

visit nodes in range and determine heights



8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
# File 'lib/assembly/height_finder.rb', line 8

def traverse(graph, initial_nodes, options={})
  by_height = []
  traversal_nodes = {}
  cycles = {}
  nodes_in_retrace_phase = Set.new

  # depth-first so stack
  stack = DS::Stack.new
  initial_nodes.each do |onode|
    next if options[:range] and options[:range].none?{|other| other == onode.node }
    traversal_node = CyclicTraversalNode.new
    traversal_node.onode = options[:reverse] ? onode.reverse : onode
    traversal_node.nodes_in = []
    traversal_nodes[traversal_node.onode.to_settable] = traversal_node
    stack.push traversal_node
  end

  while traversal_node = stack.pop
    settable = traversal_node.onode.to_settable
    describe = nil

    if log.debug?
      log.debug "visiting #{traversal_node.describe}."
    end

    # Consider node solved if height is known.
    if not traversal_node.height.nil?
      log.debug "Height of #{traversal_node.describe} is known. Skip." if log.debug?
      next
    end

    # find neighbours
    neighbours = traversal_node.nodes_out
    if neighbours.nil?
      neighbours = traversal_node.onode.next_neighbours(graph)
      if options[:range]
        neighbours.reject!{|onode| options[:range].none?{|other| other == onode.node}} #not in defined range
      end

      # Get or create traversal version of node
      neighbours = neighbours.collect do |onode|
        nbr_settable = onode.to_settable
        traversal_nbr = traversal_nodes[nbr_settable]
        if traversal_nbr.nil?
          traversal_nbr = CyclicTraversalNode.new
          traversal_nbr.onode = onode
          traversal_nbr.nodes_in = []
          traversal_nodes[nbr_settable] = traversal_nbr
        end
        traversal_nbr
      end

      #remember neighbours
      traversal_node.nodes_out = neighbours
    end


    # Can we solve the node?
    if neighbours.empty? #check for a tip
      log.debug "#{traversal_node.describe} is a tip." if log.debug?
      traversal_node.height = 0
      if by_height[0].nil?
        by_height[0] = [traversal_node]
      else
        by_height[0].push(traversal_node)
      end
      log.debug "Found height '0' for #{traversal_node.describe}." if log.debug?
      next
    end

    if nodes_in_retrace_phase.include? settable
      log.debug "Retracing back to #{traversal_node.describe}." if log.debug?

      # Neighbours should have been explored
      # Are neighbours involved in cycles?
      cyclic_neighbours = neighbours.reject{|node| node.cycles.nil?}
      if not cyclic_neighbours.empty?
        # current node is in a cycle if a neighbour is in an unclosed cycle
        log.debug "Found cyclic neighbours #{cyclic_neighbours.collect{|node| node.describe}.join(',')}." if log.debug?
        cyclic_neighbours.each do |node|
          node.cycles.each do |cycle|
            log.debug "Merging cycle #{cycle.onodes.collect{|onode| onode.to_shorthand}.join(',')}." if log.debug?
            new_cycle = traversal_node.merge_unclosed_cycle cycle.copy
            if not new_cycle.nil? and new_cycle.closed?
              log.debug "Cycle completes at #{traversal_node.describe}."
              new_cycle_key = new_cycle.to_settable
              if cycles.has_key? new_cycle_key
                log.debug "Already seen this cycle." if log.debug?
              else
                cycles[new_cycle_key] = new_cycle.onodes
              end
            end
          end
        end
      end

      # Unsolved neighbours imply a closed cyclic path.
      # Are neighbours unsolved?
      solved_neighbours = neighbours.reject{|node| node.height.nil?}
      unless solved_neighbours.empty?
        log.debug "We know the heights of neighbours #{solved_neighbours.collect{|node| node.describe}.join(',')}." if log.debug?
        # Compute height from solved neighbours
        height = solved_neighbours.map{|node| node.height}.max + 1
        log.debug "Found height '#{height}' for #{traversal_node.describe}." if log.debug?
        traversal_node.height = height
        if by_height[height].nil?
          by_height[height] = [traversal_node]
        else
          by_height[height].push(traversal_node)
        end
      end
      # If no solved neighbours, leave unsolved

      # Move out of retrace phase
      nodes_in_retrace_phase.delete settable
      log.debug "Finished retracing #{traversal_node.describe}." if log.debug?
      next
    end

    # Move current node to retrace phase, before checking for retracing neighbours in case is own neighbour
    nodes_in_retrace_phase << settable

    # Look for currently retracing neighbours and initiate cycles
    retracing_neighbours = neighbours.select{|node| nodes_in_retrace_phase.include? node.onode.to_settable}
    if not retracing_neighbours.empty?
      log.debug "Initiating cycles for neighbours #{retracing_neighbours.collect{|node| node.describe}.join(',')} currently retracing." if log.debug?
      # initiate cycles for each retracing neighbour
      retracing_neighbours.each{|node| traversal_node.initiate_cycle(node.onode)}
    end

    # Return node stack and push neighbours
    stack.push traversal_node
    log.debug "Pushing #{traversal_node.describe} in retrace mode." if log.debug?
    neighbours.each do |node|
      node_settable = node.onode.to_settable

      # Note the parent of neighbour unless already known
      nodes_in = node.nodes_in
      if not nodes_in.any?{|nbr| nbr.onode == node.onode}
        nodes_in.push traversal_node
      end

      if nodes_in_retrace_phase.include? node_settable
        # A currently retracing neighbour implies a cycle, cut it off here
        log.debug "Neighbour #{node.describe} is retracing. Not revisiting." if log.debug?
      else
        log.debug "Pushing neighbour #{node.describe}." if log.debug?
        stack.push node
      end
    end
  end
  return by_height, cycles.values
end