Class: Bio::AssemblyGraphAlgorithms::BubblyAssembler

Inherits:
SingleEndedAssembler show all
Includes:
FinishM::Logging
Defined in:
lib/assembly/bubbly_assembler.rb

Defined Under Namespace

Classes: Bubble, CircuitousPathDetected, ComparableArray, DynamicProgrammingProblem, MetaPath

Constant Summary collapse

DEFAULT_MAX_BUBBLE_LENGTH =
500
DEFAULT_BUBBLE_NODE_COUNT_LIMIT =

so, so very ‘un-educated’ guess

20
DEFAULT_BUBBLE_FORK_LIMIT =
20
DEFAULT_MAX_CYCLES =
1

Constants inherited from SingleEndedAssembler

SingleEndedAssembler::ASSEMBLY_OPTIONS, SingleEndedAssembler::DEFAULT_MAX_TIP_LENGTH, SingleEndedAssembler::DEFAULT_MIN_CONFIRMING_RECOHERENCE_READS, SingleEndedAssembler::DEFAULT_MIN_CONTIG_SIZE

Instance Attribute Summary

Attributes inherited from SingleEndedAssembler

#assembly_options, #graph

Instance Method Summary collapse

Methods included from FinishM::Logging

#log

Methods inherited from SingleEndedAssembler

#assemble, #find_beginning_trail_from_node, #find_tip_distance, #gather_starting_nodes, #is_short_tip?, #remove_tips, #setup_progressbar

Constructor Details

#initialize(graph, assembly_options = {}) ⇒ BubblyAssembler

Returns a new instance of BubblyAssembler.



28
29
30
31
32
33
34
35
# File 'lib/assembly/bubbly_assembler.rb', line 28

def initialize(graph, assembly_options={})
  opts = assembly_options
  opts[:max_bubble_length] ||= DEFAULT_MAX_BUBBLE_LENGTH
  opts[:bubble_node_count_limit] ||= DEFAULT_BUBBLE_NODE_COUNT_LIMIT
  opts[:bubble_fork_limit] ||= DEFAULT_BUBBLE_FORK_LIMIT
  opts[:max_cycles] ||= DEFAULT_MAX_CYCLES
  super graph, opts
end

Instance Method Details

#assemble_from(starting_path, visited_oriented_node_settables = Set.new) ⇒ Object

Starting at a node within a graph, walk through the graph accepting forks, so long as the fork paths converge within some finite length in the graph (the leash length, measured in number of base pairs).

Return an Array of Path arrays, a MetaPath, where each path array are the different paths that can be taken at each fork point



43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
# File 'lib/assembly/bubbly_assembler.rb', line 43

def assemble_from(starting_path, visited_oriented_node_settables=Set.new)
  leash_length = @assembly_options[:max_bubble_length]
  if log.info? and starting_path.kind_of?(Bio::Velvet::Graph::OrientedNodeTrail)
    log.info "Assembling from: #{starting_path.to_shorthand}"
  end

  filter_neighbours = lambda do |neighbours|
    legit_neighbours, visiteds = remove_tips(neighbours, @assembly_options[:max_tip_length])
    visiteds.each do |onode|
      log.debug "Adding #{onode} to list of visited nodes" if log.debug?
      visited_oriented_node_settables << onode
    end
    legit_neighbours
  end

  filterVisited = lambda do |oneigh|
    visited_oriented_node_settables.include? oneigh.to_settable
  end

  # set up basic dynamic programming problem
  baseProblem = lambda do |oneigh|
    new_problem = DynamicProgrammingProblem.new
    new_problem.distance = 0
    new_path = Bio::Velvet::Graph::OrientedNodeTrail.new
    new_path.add_oriented_node oneigh
    new_problem.path = new_path
    new_problem.ubiquitous_oriented_nodes = Set.new
    new_problem.ubiquitous_oriented_nodes << oneigh.to_settable
    new_problem.visited_oriented_nodes = Set.new
    new_problem.visited_oriented_nodes << oneigh.to_settable
    new_problem
  end

  # extend dynamic programming problem
  extendedProblem = lambda do |problem, oneigh|
    new_problem = DynamicProgrammingProblem.new
    new_problem.distance = problem.distance + problem.path[-1].node.length_alone
    new_path = problem.path.copy
    new_path.add_oriented_node oneigh
    new_problem.path = new_path
    new_problem.ubiquitous_oriented_nodes = Set.new problem.ubiquitous_oriented_nodes
    new_problem.ubiquitous_oriented_nodes << oneigh.to_settable
    new_problem.visited_oriented_nodes = Set.new problem.visited_oriented_nodes
    new_problem.visited_oriented_nodes << oneigh.to_settable
    new_problem.circular_path_detected = true if problem.visited_oriented_nodes.include? oneigh.to_settable
    new_problem
  end

  current_bubble = nil
  metapath = MetaPath.new
  starting_path.each do |oriented_node|
    log.debug "adding onode at the start: #{oriented_node.to_shorthand}" if log.debug?
    metapath << oriented_node
  end

  # Keep track of nodes visited in this trajectory already so circuits can be avoided
  #visited_oriented_node_settables = Set.new
  starting_path.each do |e|
    if e.kind_of?(Bubble)
      e.oriented_nodes do |onode|
        visited_oriented_node_settables << onode.to_settable
      end
    else
      visited_oriented_node_settables << e.to_settable
    end
  end
  #log.debug "Starting with visited nodes #{visited_oriented_node_settables.to_a.join(',')}" if log.debug?

  current_mode = :linear # :linear, :bubble, or :finished

  while current_mode != :finished
    if current_mode == :linear
      log.debug "Starting a non-bubble from #{metapath.to_shorthand}" if log.debug?
      while true
        oriented_neighbours = metapath.last_oriented_node.next_neighbours(@graph)
        log.debug "Found oriented neighbours #{oriented_neighbours.collect{|onode| onode.to_shorthand} }" if log.debug?

        legit_neighbours = nil
        # Cut off tips unless it is the only way
        if oriented_neighbours.length == 1
          legit_neighbours = oriented_neighbours
        else
          legit_neighbours = filter_neighbours.call(oriented_neighbours)
        end

        if legit_neighbours.empty?
          # This is just a straight out dead end, and we can go no further.
          log.debug "Dead end reached" if log.debug?
          metapath.fate = MetaPath::DEAD_END_FATE
          current_mode = :finished
          break
        elsif legit_neighbours.length == 1
          # Linear thing here, just keep moving forward
          neighbour = legit_neighbours[0]

          # Stop if a circuit is detected
          # Tim - Always stop on a circuit in linear mode. "We cannot get out." - Book of Mazarbul.
          if visited_oriented_node_settables.include?(neighbour.to_settable)
            log.debug "Detected circuit in linear mode by running into #{neighbour.to_settable}" if log.debug?
            metapath.fate = MetaPath::CIRCUIT_FATE
            current_mode = :finished
            break
          else
            visited_oriented_node_settables << neighbour.to_settable
            metapath << neighbour
          end

        else
          # Reached a fork in the graph here, the point of this algorithm, really.
          current_bubble = Bubble.new metapath.last_oriented_node
          log.debug "Starting a bubble forking from metapath #{metapath.to_shorthand}" if log.debug?

          if legit_neighbours.all? &filterVisited
            log.debug "Detected fork in linear mode where all neighbours have been previously traversed. This is effectively a dead end." if log.debug?
            metapath.fate = MetaPath::CIRCUIT_FATE
            current_mode = :finished
          end

          legit_neighbours.each do |oneigh|
            new_problem = baseProblem.call oneigh
            log.debug "Adding problem to bubble: #{new_problem}" if log.debug?

            current_bubble.enqueue new_problem
            current_mode = :bubble
          end
          break
        end
      end


    elsif current_mode == :bubble
      # We are in a bubble. Go get some.
      log.debug "entering bubble mode" if log.debug?

      # next problem = queue.shift. while distance of next problem is not beyond the leash length
      while current_mode == :bubble
        problem = current_bubble.shift

        if problem.nil?
          # Getting here seems improbable if not impossible.
          # The current bubble doesn't converge and just has short tips at the end, don't add it to the metapath
          metapath.fate = MetaPath::DEAD_END_FATE
          current_mode = :finished
          log.debug "Reached a dead end, ignoring this path" if log.debug?
          break
        end

        log.debug "Dequeued #{problem.to_shorthand}" if log.debug?
        if !leash_length.nil? and problem.distance > leash_length
          # The current bubble doesn't converge, don't add it to the metapath
          metapath.fate = MetaPath::DIVERGES_FATE
          current_mode = :finished
          log.debug "Bubble is past the leash length of #{leash_length}, giving up" if log.debug?
          break
        elsif current_bubble.convergent_on?(problem)
          log.debug "Bubble #{current_bubble.to_shorthand} convergent on #{problem.to_shorthand}" if log.debug?
          current_bubble.converge_on problem
          # convergement!
          # Bubble ended in a convergent fashion

          metapath << current_bubble
          # Add the nodes in the bubble to the list of visited nodes
          current_bubble.oriented_nodes do |onode|
            visited_oriented_node_settables << onode.to_settable
          end

          current_bubble = nil
          current_mode = :linear
          break
        else
          # otherwise we must search on in the bubble
          # get all neighbours that are not short tips
          log.debug "Bubble not convergent on #{problem.to_shorthand}" if log.debug?

          neighbours = problem.path.neighbours_of_last_node(@graph)

          # If there is only 1 way to go, go there
          if neighbours.length == 1
            log.debug "Only one way to go from this node, going there" if log.debug?

            oneigh = neighbours[0]
            new_problem = extendedProblem.call problem, oneigh
            current_bubble.enqueue new_problem
            log.debug "Enqueued #{new_problem.to_shorthand}, total nodes now #{current_bubble.num_known_problems} and num forks #{current_bubble.num_legit_forks}" if log.debug?

            # check to make sure we aren't going overboard in the bubbly-ness
            if !@assembly_options[:bubble_node_count_limit].nil? and current_bubble.num_known_problems > @assembly_options[:bubble_node_count_limit]
              log.debug "Too complex a bubble detected, giving up" if log.debug?
              metapath.fate = MetaPath::NODE_COUNT_LIMIT_REACHED
              current_mode = :finished
              break
            end
          else
            legit_neighbours = filter_neighbours.call(neighbours)

            if legit_neighbours.length == 0
              # this is a kind of 'long' tip, possibly unlikely to happen much.
              # Forget about it and progress to the next problem having effectively
              # removed it from the bubble
              log.debug "Found no neighbours to re-enqueue" if log.debug?
            else
              # Increment complexity counter if this is a real fork
              if legit_neighbours.length > 1
                current_bubble.num_legit_forks += 1
              end

              legit_neighbours.each do |oneigh|
                new_problem = extendedProblem.call problem, oneigh
                current_bubble.enqueue new_problem
                log.debug "Enqueued #{new_problem.to_shorthand}, total nodes now #{current_bubble.num_known_problems} and num forks #{current_bubble.num_legit_forks}" if log.debug?

                # check to make sure we aren't going overboard in the bubbly-ness
                if (!@assembly_options[:bubble_fork_limit].nil? and current_bubble.num_legit_forks > @assembly_options[:bubble_fork_limit]) or
                    (!@assembly_options[:bubble_node_count_limit].nil? and current_bubble.num_known_problems > @assembly_options[:bubble_node_count_limit])
                  log.debug "Too complex a bubble detected, giving up" if log.debug?
                  metapath.fate = MetaPath::NODE_COUNT_LIMIT_REACHED
                  current_mode = :finished
                  break
                end
              end
            end
          end
        end
      end
    else
      raise "Programming error: Unexpected mode: #{current_mode}"
    end

    log.debug "Reached end of main loop in mode #{current_mode}" if log.debug?
  end

  return metapath, visited_oriented_node_settables
end

#remove_seen_nodes_from_end_of_path(path, seen_nodes) ⇒ Object



287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
# File 'lib/assembly/bubbly_assembler.rb', line 287

def remove_seen_nodes_from_end_of_path(path, seen_nodes)
  log.debug "Removing from the end of the path #{path.to_shorthand} any nodes in set of length #{seen_nodes.length}" if log.debug?

  node_seen = lambda do |oriented_node|
    seen_nodes.include?([oriented_node.node_id, Bio::Velvet::Graph::OrientedNodeTrail::START_IS_FIRST]) or
    seen_nodes.include?([oriented_node.node_id, Bio::Velvet::Graph::OrientedNodeTrail::END_IS_FIRST])
  end

  while !path.empty?
    last_node_or_bubble_index = path.length-1
    last_node_or_bubble = path[last_node_or_bubble_index]

    delete = false
    if last_node_or_bubble.kind_of?(Bubble)
      last_node_or_bubble.oriented_nodes do |onode|
        if node_seen.call(onode)
          delete = true
          break
        end
      end
    else
      delete = node_seen.call(last_node_or_bubble)
    end

    if delete
      path.delete_at last_node_or_bubble_index
    else
      # Last node is not previously seen, chop no further.
      break
    end
  end

  return path
end

#seen_last_in_path?(path, seen_nodes) ⇒ Boolean

Returns:

  • (Boolean)


277
278
279
280
281
282
283
284
# File 'lib/assembly/bubbly_assembler.rb', line 277

def seen_last_in_path?(path, seen_nodes)
  last = path[-1]
  if last.kind_of?(Bubble)
    return remove_seen_nodes_from_end_of_path(path, seen_nodes).length < path.length
  else
    return seen_nodes.include?(path[-1].to_settable)
  end
end