Class: Bio::Velvet::Graph::OrientedNodeTrail

Inherits:
Object
  • Object
show all
Includes:
Logging, Enumerable
Defined in:
lib/assembly/oriented_node_trail.rb

Overview

An ordered list of nodes, each with an orientation along that trail

Defined Under Namespace

Classes: IllDefinedTrailDefinition, InsufficientLengthException, OrientedNode

Constant Summary collapse

START_IS_FIRST =
:start_is_first
END_IS_FIRST =
:end_is_first

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(node_pairs = []) ⇒ OrientedNodeTrail

initialize a new path. If an array is given, each element should be a pair: first element of the pair is a node, and the second true/false or START_IS_FIRST/END_IS_FIRST



114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
# File 'lib/assembly/oriented_node_trail.rb', line 114

def initialize(node_pairs=[])
  @trail = []
  node_pairs.each do |pair|
    node = pair[0]
    dir = pair[1]
    unless node.kind_of?(Bio::Velvet::Graph::Node) and [true, false, START_IS_FIRST, END_IS_FIRST].include?(dir)
      raise "Bad initialisation of OrientedNodeTrail, with #{node_pairs.inspect}, particularly #{pair.inspect}"
    end
    onode = OrientedNode.new
    onode.node = node
    if dir==true
      onode.first_side = START_IS_FIRST
    elsif dir==false
      onode.first_side = END_IS_FIRST
    else
      onode.first_side = dir
    end
    @trail.push onode
  end
end

Instance Attribute Details

#trailObject

Returns the value of attribute trail.



103
104
105
# File 'lib/assembly/oriented_node_trail.rb', line 103

def trail
  @trail
end

Class Method Details

.create_from_shorthand(path_string, graph) ⇒ Object



135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
# File 'lib/assembly/oriented_node_trail.rb', line 135

def self.create_from_shorthand(path_string, graph)
  stones = path_string.split(',').collect{|s| s.strip}
  return self.new if stones.length == 0
  trail = []
  stones.each do |stone|
    onode = OrientedNode.new
    if matches = stone.match(/^(\d+)([se])$/)
      node = graph.nodes[matches[1].to_i]
      raise IllDefinedTrailDefinition, "Unable to find node #{matches[1] } in the graph, cannot continue" if node.nil?
      onode.node = node

      if matches[2] == 's'
        onode.first_side = START_IS_FIRST
      else
        onode.first_side = END_IS_FIRST
      end
    else
      raise IllDefinedTrailDefinition, "Unable to underestand shorthand #{stone}"
    end
    trail.push onode
  end
  path = self.new
  path.trail = trail
  return path
end

.create_from_super_shorthand(path_string, graph) ⇒ Object

Given a string like ‘2,3,4’ (super-shorthand form), return the OrientedNodeTrail that thise defines. Raises ‘IllDefinedTrailDefinition Exception if there is any ambiguity.



164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
# File 'lib/assembly/oriented_node_trail.rb', line 164

def self.create_from_super_shorthand(path_string, graph)
  stones = path_string.split(',').collect{|s| s.strip}
  return self.new if stones.length == 0
  if stones.length == 1
    raise IllDefinedTrailDefinition, "Cannot know path orientation when only one node is given"
  end
  state = 'first'
  trail = []

  stones.each do |str|
    if matches = str.match(/^([01-9]+)$/)
      if state == 'first'
        state = 'second'
      elsif state == 'second'
        # Determine the direction of the first two nodes
        first, second = stones[0..1].collect do |str|
          if matches = str.match(/^([01-9]+)$/)
            node = graph.nodes[matches[1].to_i]
            if node.nil?
              raise IllDefinedTrailDefinition, "Node `#{matches[1] }' from #{path_string} does not appear to be a node ID in the graph"
            end
            OrientedNode.new(node, START_IS_FIRST)
          else
            raise IllDefinedTrailDefinition, "Unable to parse stepping stone along the path: `#{str}'. Entire path was `#{path_string}'."
          end
        end
        neighbours_of_first_s = first.next_neighbours(graph)

        rev_first = OrientedNode.new first.node, first.first_side
        rev_first.first_side = END_IS_FIRST
        neighbours_of_first_e = rev_first.next_neighbours(graph)

        if neighbours_of_first_s.find{|n| n.node_id == second.node_id}
          if neighbours_of_first_e.find{|n| n.node_id == second.node_id}
            raise IllDefinedTrailDefinition, "Both start and end of first node connect to second node, I'm confused."
          else
            seconds = neighbours_of_first_s.select{|n| n.node_id == second.node_id}
            if seconds.length > 1
              raise IllDefinedTrailDefinition, "first node connects to both start and end of second node, I'm confused."
            else
              trail.push first
              trail.push seconds[0]
            end
          end
        elsif neighbours_of_first_e.find{|n| n.node_id == second.node_id}
          seconds = neighbours_of_first_e.select{|n| n.node_id == second.node_id}
          if seconds.length > 1
            raise IllDefinedTrailDefinition, "first node connects to both start and end of second node, I'm confused."
          else
            trail.push rev_first
            trail.push seconds[0]
          end
        else
          raise IllDefinedTrailDefinition, "First and second nodes do not appear to be directly connected"
        end
        state = 'beyond'

      else #we are at the third or later node in the path
        last = trail[-1]
        neighbours_of_last = last.next_neighbours(graph)
        nexts = neighbours_of_last.select{|n| n.node_id == matches[1].to_i}
        if nexts.length == 0
          raise IllDefinedTrailDefinition, "Nodes #{last} and #{matches[1] } do not appear to be connected"
        elsif nexts.length > 1
          raise IllDefinedTrailDefinition, "Node #{last} connects to both the start and end of #{matches[1] }, I'm confused"
        else
          trail.push nexts[0]
          last = nexts[0]
        end
      end

    else #can't regex the text as shorthand stone or super-shorthand stone
      raise "Unable to parse stepping stone along the path: `#{str}'. Entire path was `#{path_string}'."
    end
  end

  to_return = OrientedNodeTrail.new
  to_return.trail = trail
  return to_return
end

Instance Method Details

#==(another) ⇒ Object



413
414
415
416
417
418
419
# File 'lib/assembly/oriented_node_trail.rb', line 413

def ==(another)
  return false if trail.length != another.trail.length
  each_with_index do |onode, i|
    return false unless onode == another[i]
  end
  return true
end

#[](index) ⇒ Object



291
292
293
# File 'lib/assembly/oriented_node_trail.rb', line 291

def [](index)
  @trail[index]
end

#add_node(node, start_or_end) ⇒ Object

Add a node to the trail. start_or_end is either OrientedNodeTrail::START_IS_FIRST or OrientedNodeTrail::END_IS_FIRST



247
248
249
250
251
252
253
254
255
256
# File 'lib/assembly/oriented_node_trail.rb', line 247

def add_node(node, start_or_end)
  possible_orientations = [START_IS_FIRST, END_IS_FIRST]
  unless possible_orientations.include?(start_or_end)
    raise "Unexpected orientation in node trail. Need one of #{possible_orientations.inspect}, found #{start_or_end}"
  end
  oriented = OrientedNode.new
  oriented.node = node
  oriented.first_side = start_or_end
  @trail.push oriented
end

#add_oriented_node(oriented_node) ⇒ Object



258
259
260
# File 'lib/assembly/oriented_node_trail.rb', line 258

def add_oriented_node(oriented_node)
  @trail.push oriented_node
end

#add_setabled_nodes(setabled_nodes, graph) ⇒ Object

Given an Array of [node_id, start_or_end] pairs add these to the trail



264
265
266
267
268
269
# File 'lib/assembly/oriented_node_trail.rb', line 264

def add_setabled_nodes(setabled_nodes, graph)
  setabled_nodes.each do |pair|
    raise "programming error" if pair.length != 2
    add_node graph.nodes[pair[0]], pair[1]
  end
end

#copyObject



343
344
345
346
347
# File 'lib/assembly/oriented_node_trail.rb', line 343

def copy
  o = OrientedNodeTrail.new
  o.trail = Array.new(@trail.collect{|onode| onode.copy})
  return o
end

#coverageObject

The weighted average of coverages along the trail, (weighted by node length)



402
403
404
405
406
407
408
409
410
411
# File 'lib/assembly/oriented_node_trail.rb', line 402

def coverage
  total_length = 0
  total_coverage = 0.0
  each do |onode|
    len =  onode.node.length_alone
    total_coverage += onode.node.coverage*len
    total_length += len
  end
  return total_coverage / total_length
end

#delete_at(index) ⇒ Object



283
284
285
# File 'lib/assembly/oriented_node_trail.rb', line 283

def delete_at(index)
  @trail.delete_at(index)
end

#each(&block) ⇒ Object



271
272
273
# File 'lib/assembly/oriented_node_trail.rb', line 271

def each(&block)
  @trail.each(&block)
end

#include_oriented_node?(oriented_node) ⇒ Boolean

Return true if the path contains the oriented node

Returns:

  • (Boolean)


297
298
299
# File 'lib/assembly/oriented_node_trail.rb', line 297

def include_oriented_node?(oriented_node)
  @trail.include?(oriented_node)
end

#inspectObject



359
360
361
# File 'lib/assembly/oriented_node_trail.rb', line 359

def inspect
  to_s
end

#lastObject



275
276
277
# File 'lib/assembly/oriented_node_trail.rb', line 275

def last
  @trail[@trail.length-1]
end

#lengthObject



287
288
289
# File 'lib/assembly/oriented_node_trail.rb', line 287

def length
  @trail.length
end

#length_in_bpObject

Length of a contig made from this path



364
365
366
367
# File 'lib/assembly/oriented_node_trail.rb', line 364

def length_in_bp
  return 0 if @trail.empty?
  return length_in_bp_within_path+@trail[0].node.parent_graph.hash_length-1
end

#length_in_bp_within_pathObject

Length of this trail if it is part of a larger path



370
371
372
373
374
375
# File 'lib/assembly/oriented_node_trail.rb', line 370

def length_in_bp_within_path
  return 0 if @trail.empty?
  reduce(0) do |total, onode|
    total + onode.node.length_alone
  end
end

#neighbours_of_last_node(graph) ⇒ Object

Return a list of OrientedNode objects, one for each neighbour of the last node in this path (in the correct direction)



303
304
305
# File 'lib/assembly/oriented_node_trail.rb', line 303

def neighbours_of_last_node(graph)
  graph.neighbours_of(last.node, last.first_side)
end

#remove_last_nodeObject



279
280
281
# File 'lib/assembly/oriented_node_trail.rb', line 279

def remove_last_node
  @trail.pop
end

#reverseObject



394
395
396
397
398
# File 'lib/assembly/oriented_node_trail.rb', line 394

def reverse
  rev = copy
  rev.reverse!
  return rev
end

#reverse!Object



386
387
388
389
390
391
392
# File 'lib/assembly/oriented_node_trail.rb', line 386

def reverse!
  @trail.reverse!
  @trail.each do |onode|
    onode.reverse!
  end
  nil
end

#sequenceObject

Return the sequence of the entire trail, or an empty string if there is no nodes in the trail. For certain (small) configurations of (short) nodes, there may be insufficient information to uniquely determine the sequence of the trail. In that case an exception is thrown.



311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
# File 'lib/assembly/oriented_node_trail.rb', line 311

def sequence
  return '' if @trail.empty?
  fwd_nodes_sequence, twin_nodes_sequence = sequences_within_path
  missing_length_from_each_side = @trail[0].node.parent_graph.hash_length-1
  if twin_nodes_sequence.length < missing_length_from_each_side
    raise InsufficientLengthException, "Not enough information to know the sequence of a node trail"
  else
    seq_length_required = @trail.collect{|n| n.node.length_alone}.reduce(:+) + missing_length_from_each_side - twin_nodes_sequence.length
    log.debug "first part: #{twin_nodes_sequence}"
    log.debug "second: #{fwd_nodes_sequence[-seq_length_required...fwd_nodes_sequence.length] }"
    return revcom(twin_nodes_sequence)[0...(@trail[0].node.parent_graph.hash_length-1)]+fwd_nodes_sequence
    # calculating this way should be the same, but is somehow buggy in velvet?
    #return revcom(twin_nodes_sequence)+fwd_nodes_sequence[-seq_length_required...fwd_nodes_sequence.length]
  end
end

#sequences_within_pathObject



327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
# File 'lib/assembly/oriented_node_trail.rb', line 327

def sequences_within_path
  return '', '' if @trail.empty?
  twin_nodes_sequence = ''
  fwd_nodes_sequence = ''
  @trail.each do |onode|
    if onode.starts_at_start?
      twin_nodes_sequence = onode.node.ends_of_kmers_of_twin_node + twin_nodes_sequence
      fwd_nodes_sequence += onode.node.ends_of_kmers_of_node
    else
      twin_nodes_sequence = onode.node.ends_of_kmers_of_node + twin_nodes_sequence
      fwd_nodes_sequence += onode.node.ends_of_kmers_of_twin_node
    end
  end
  return fwd_nodes_sequence, twin_nodes_sequence
end

#to_sObject



349
350
351
# File 'lib/assembly/oriented_node_trail.rb', line 349

def to_s
  "OrientedNodeTrail: #{object_id}: #{to_shorthand }"
end

#to_short_sObject



353
354
355
356
357
# File 'lib/assembly/oriented_node_trail.rb', line 353

def to_short_s
  collect do |onode|
    onode.node.node_id
  end.join(',').to_s
end

#to_shorthandObject



377
378
379
380
381
382
383
384
# File 'lib/assembly/oriented_node_trail.rb', line 377

def to_shorthand
  shorthand = @trail.collect do |onode|
    [
      onode.node.node_id,
      onode.starts_at_start? ? 's' : 'e'
      ].join
  end.join(',')
end