Class: Bio::DB::PileupIterator

Inherits:
Object
  • Object
show all
Includes:
Enumerable
Defined in:
lib/bio/db/pileup_iterator.rb

Defined Under Namespace

Classes: PileupRead

Instance Method Summary collapse

Constructor Details

#initialize(io) ⇒ PileupIterator

Returns a new instance of PileupIterator.



11
12
13
# File 'lib/bio/db/pileup_iterator.rb', line 11

def initialize(io)
  @io = io
end

Instance Method Details

#eachObject

Iterates through the positions of the a pileup, returning an instance of Bio::DB::Pileup complete with an instance variable @reads, an Array of Bio::DB::PileupRead objects.

Known problems:

  • Doesn’t record start or ends of each read

  • Doesn’t lookahead to determine the sequence of each read (though it does give the preceding bases)

  • Doesn’t record the mapping quality of each read



21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
# File 'lib/bio/db/pileup_iterator.rb', line 21

def each
  current_ordered_reads = []
  log = Bio::Log::LoggerPlus['bio-pileup_iterator']
  logging = true
  
  @io.each_line do |line|
    pileup = Bio::DB::Pileup.new(line.strip)
    current_read_index = 0
    reads_ending = []

    bases = pileup.read_bases
    log.debug "new column's read_bases: #{bases.inspect}" if log.debug?
    log.debug "pileup entry parsed: #{pileup.inspect}" if log.debug?
    while bases.length > 0
      log.debug "==== new read within a single pileup being parsed. Starting with #{bases}" if log.debug?
      
      # Firstly, what is the current read we are working with
      current_read = current_ordered_reads[current_read_index]
      # if adding a new read
      if current_read.nil?
        log.debug 'adding a new read: '+bases if log.debug?
        current_read = PileupRead.new
        current_ordered_reads.push current_read
      end
      matches = nil
      
      # if starting, remove it
      log.debug "before read start removal, pileup is #{bases}, read is #{current_read}" if log.debug?
      matched_string = ''
      if bases[0]=='^'
        # Match the ^ and the mapping quality
        matched_string += bases[0..1]
        bases = bases[2...bases.length]
      end
      log.debug "after read start removal, pileup is #{bases}" if log.debug?
      
      # next expect the actual base bit
      if matches = bases.match(/^([ACGTNacgtn\.\,\*])/)
        matched_string += bases[0]
        if matches[1] == '.'
          if !current_read.direction.nil? and current_read.direction != PileupRead::FORWARD_DIRECTION
            pp current_read
            raise "Unexpectedly found direction #{current_read.direction}, expected #{PileupRead::FORWARD_DIRECTION}, in starting at '#{bases}'(EndOfLine) in '#{line}', in the read above"
          end
          current_read.direction = PileupRead::FORWARD_DIRECTION
          current_read.sequence = "#{current_read.sequence}#{pileup.ref_base}"
        elsif matches[1] == ','
          if !current_read.direction.nil? and current_read.direction != PileupRead::REVERSE_DIRECTION
            pp current_read
            raise "Unexpectedly found direction #{current_read.direction}, expected #{PileupRead::REVERSE_DIRECTION}, in starting at '#{bases}'(EndOfLine) in '#{line}', in the read above"
          end
          current_read.direction = PileupRead::REVERSE_DIRECTION
          current_read.sequence = "#{current_read.sequence}#{pileup.ref_base}"
        else
          if current_read.direction.nil? and matches[1] != '*'
            # Could sanity check the direction here by detecting case, but eh
            if matches[1].upcase == matches[1] or matches[1]== '.'
              current_read.direction = PileupRead::FORWARD_DIRECTION
            else
              current_read.direction = PileupRead::REVERSE_DIRECTION
            end
          end
          current_read.sequence = "#{current_read.sequence}#{matches[1]}"
        end
        # remove the matched base
        bases = bases[1...bases.length]
      else
        raise Exception, "Expected a character corresponding to a base, one of '[ACGTNacgtn.,]'. Starting here: #{bases}, from #{pileup.inspect}"
      end
      log.debug "after regular position removal, pileup is #{bases}" if log.debug?
      
      # then read insertion or deletion in the coming position(s)
      if matches = bases.match(/^([\+\-])([0-9]+)/)
        matched_length = matches[1].length+matches[2].length
        bases = bases[matched_length...bases.length]
        matched_string += matches[1]+matches[2]
        log.debug "after removal of bases leading up to an insertion/deletion, pileup is #{bases}" if log.debug?
        
        regex = /^([ACGTNacgtn=]{#{matches[2].to_i}})/
        log.debug "insertion/deletion secondary regex: #{regex.inspect}" if log.debug?
        last_matched = bases.match(regex)
        
        if last_matched.nil?
          raise Exception, "Failed to parse insertion. Starting here: #{bases}, from #{pileup.inspect}"
        else
          bases = bases[last_matched[1].length...bases.length]
          if matches[1]=='+'
            # record the insertion
            current_read.add_insertion pileup.pos, matches[2], last_matched[1]
          elsif matches[1]=='-'
            #currently deletions are not recorded, slipped to future
          end
        end
      end
      log.debug "after indel removal, pileup is now #{bases}" if log.debug?
      
      # Then read an ending read
      if bases[0]=='$'
        reads_ending.push current_read_index
        matched_string += '$'
        bases = bases[1...bases.length]
      end
      
      log.debug "Matched '#{matched_string}', now the bases are '#{bases}'" if log.debug?
      
      current_read_index += 1
    end

    # Create a new copy of the array and yield that, otherwise when things get deleted they get removed from the yielded array as well (which is unwanted)
    yielded_array = Array.new(current_ordered_reads)
    pileup.reads = yielded_array
    #log.debug "Number of reads yielded: #{pileup.reads.length}"
    yield pileup
    
    # Remove reads that ended. In reverse order since removing the last ones first doesn't mess with the indices beforehand in the array
    reads_ending.reverse.each do |i|
      #log.debug "Deleting read of index #{i} (total reads #{current_ordered_reads.length}): #{current_ordered_reads[i].inspect}"
      current_ordered_reads.delete_at i
    end
    #log.debug "Ended up with #{current_ordered_reads.length} reads that should be present next time"
  end
end