Class: Bio::AssemblyGraphAlgorithms::NodeFinder

Inherits:
Object
  • Object
show all
Includes:
FinishM::Logging
Defined in:
lib/assembly/node_finder.rb

Instance Method Summary collapse

Methods included from FinishM::Logging

#log

Instance Method Details

#find_nodes_with_kmers(velvet_graph, kmers) ⇒ Object



126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
# File 'lib/assembly/node_finder.rb', line 126

def find_nodes_with_kmers(velvet_graph, kmers)
  # TODO: search in a more sane way, algorithmically
  # TODO: only choose kmers that are unique to the assembly
  found_nodes = []
  kmers.each_with_index do |fwd, i|
    rev = Bio::Sequence::NA.new(fwd).reverse_complement.to_s.upcase
    #log.debug "Testing kmer #{i} #{fwd} / #{rev}"
    current_haul = []
    velvet_graph.nodes.each do |node|
      if log.debug?
        str = "Searching for #{fwd} and #{rev} in node #{node.node_id}"
        #str += node.sequence
      end
      if node.sequence? and (node.sequence.include?(fwd) or node.sequence.include?(rev))
        current_haul.push node
      end
    end
    if !current_haul.empty?
      if current_haul.length == 1
        node = current_haul[0]
        log.debug "Found a possibly suitable node in the graph, of length #{node.length}" if log.debug?
        found_node = node
        if found_node.sequence.include?(fwd)
          if found_node.sequence.include?(rev)
            log.debug "Found a kmer that is included in the forward and reverse directions of the same node. Unlucky, ignoring this kmer" if log.debug?
            found_nodes = [found_node]
          else
            log.debug "Found a suitable kmer fwd: #{fwd}"
            found_nodes = [found_node]
          end
        else
          log.debug "Found a suitable kmer rev: #{rev}"
          found_nodes = [found_node]
        end
        break
      else
        log.debug "kmer #{fwd}/#{rev} was found in multiple nodes" if log.debug?
        found_nodes = current_haul
      end
    end
  end
  return found_nodes
end

#find_probes_from_read_to_node(graph, read_to_node, probe_sequence_ids) ⇒ Object

Pick the best nodes from each of the probe sequences, and return an array for each probe sequence: ( [best_node, best_noded_read.direction, best_noded_read] or

if no probe could be found )



77
78
79
80
81
82
# File 'lib/assembly/node_finder.rb', line 77

def find_probes_from_read_to_node(graph, read_to_node, probe_sequence_ids)
  return probe_sequence_ids.collect do |sequence_id|
    nodes = read_to_node[sequence_id].collect{|node_id| graph.nodes[node_id]}
    pick_best_node_for_read_id(sequence_id, nodes)
  end
end

#find_unique_node_with_kmers(velvet_graph, kmers) ⇒ Object



84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# File 'lib/assembly/node_finder.rb', line 84

def find_unique_node_with_kmers(velvet_graph, kmers)
  # TODO: search in a more sane way, algorithmically
  # TODO: only choose kmers that are unique to the assembly
  found_node = nil
  found_direction = nil
  kmers.each_with_index do |fwd, i|
    rev = Bio::Sequence::NA.new(fwd).reverse_complement.to_s.upcase
    #log.debug "Testing kmer #{i} #{fwd} / #{rev}"
    current_haul = []
    velvet_graph.nodes.each do |node|
      if log.debug?
        str = "Searching for #{fwd} and #{rev} in node #{node.node_id}"
      end
      if node.sequence? and (node.sequence.include?(fwd) or node.sequence.include?(rev))
        current_haul.push node
      end
    end
    if !current_haul.empty?
      if current_haul.length == 1
        node = current_haul[0]
        log.debug "Found a possibly suitable node in the graph, of length #{node.length}" if log.debug?
        found_node = node
        if found_node.sequence.include?(fwd)
          if found_node.sequence.include?(rev)
            log.debug "Found a kmer that is included in the forward and reverse directions of the same node. Unlucky, ignoring this kmer" if log.debug?
          else
            log.info "Found a suitable kmer fwd: #{fwd}"
            found_direction = true
          end
        else
          log.info "Found a suitable kmer rev: #{rev}"
          found_direction = false
        end
        break unless found_direction.nil?
      else
        log.debug "kmer #{fwd}/#{rev} was found in multiple nodes, so not using it as a starting/ending node" if log.debug?
      end
    end
  end
  return found_node, found_direction
end

#find_unique_nodes_with_sequence_ids(graph, sequence_ids, options = {}) ⇒ Object



31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# File 'lib/assembly/node_finder.rb', line 31

def find_unique_nodes_with_sequence_ids(graph, sequence_ids, options={})
  # Create data structure
  endings = {}
  sequence_ids.each do |seq_id|
    endings[seq_id] = []
  end

  # Fill data structure with candidate nodes

  graph.nodes.each do |node|
    next if options[:target_node_ids] and !options[:target_node_ids].include?(node.node_id)
    next if node.short_reads.nil?
    node.short_reads.each do |read|
      if endings[read.read_id]
        endings[read.read_id].push node
      end
    end
  end

  # Pick the best node from each of the candidate nodes for each sequence_id
  return endings.collect do |sequence_id, nodes|
    pick_best_node_for_read_id(sequence_id, nodes)
  end
end

#pick_best_node_for_read_id(sequence_id, nodes) ⇒ Object

Pick the best node from each of the candidate nodes a sequence_id, and return an array [best_node, best_noded_read.direction, best_noded_read] or

if no probe could be found



59
60
61
62
63
64
65
66
67
68
69
70
71
# File 'lib/assembly/node_finder.rb', line 59

def pick_best_node_for_read_id(sequence_id, nodes)
  best_node = nodes.min do |n1, n2|
    r1 = n1.short_reads.find{|r| r.read_id == sequence_id}
    r2 = n2.short_reads.find{|r| r.read_id == sequence_id}
    r1.start_coord <=> r2.start_coord
  end
  if best_node
    best_noded_read = best_node.short_reads.find{|r| r.read_id == sequence_id}
    [best_node, best_noded_read.direction, best_noded_read]
  else
    []
  end
end