Class: Bio::DB::Pileup

Inherits:
Object
  • Object
show all
Defined in:
lib/bio/db/pileup.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(pile_up_line) ⇒ Pileup

creates the Pileup object

pile_up_line = "seq2\t151\tG\tG\t36\t0\t99\t12\t...........A\t:9<;;7=<<<<<"
pile = Bio::DB::Pileup.new(pile_up_line)


35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# File 'lib/bio/db/pileup.rb', line 35

def initialize(pile_up_line)
  cols = pile_up_line.split(/\t/)
  if cols.length == 6 ##should only be able to get 6 lines from mpileup
    @ref_name, @pos, @ref_base, @coverage, @read_bases, @read_quals = cols
  elsif (10..13).include?(cols.length) ##incase anyone tries to use deprecated pileup with -c flag we get upto 13 cols...
    if cols[2] == '*' #indel
      @ref_name, @pos, @ref_base, @consensus, @consensus_quality, @snp_quality, @rms_mapq, @coverage, @indel_1, @indel_2, @ar1, @ar2, @ar3 = cols
    else #snp / identity
      @ref_name, @pos, @ref_base, @consensus, @consensus_quality, @snp_quality, @rms_mapq, @coverage, @read_bases, @read_quals = cols
    end
    @consensus_quality = @consensus_quality.to_f
    @snp_quality = @snp_quality.to_f
    @rms_mapq = @rms_mapq.to_f
  else
    #raise RuntimeError, "parsing line '#{pile_up_line.chomp}' failed"
  end
    
  @pos = @pos.to_i
  @coverage = @coverage.to_f
  @ref_count = nil
  @non_ref_count_hash = nil 
  @non_ref_count = nil
end

Instance Attribute Details

#ar1Object

Returns the value of attribute ar1.



30
31
32
# File 'lib/bio/db/pileup.rb', line 30

def ar1
  @ar1
end

#ar2Object

Returns the value of attribute ar2.



30
31
32
# File 'lib/bio/db/pileup.rb', line 30

def ar2
  @ar2
end

#ar3Object

Returns the value of attribute ar3.



30
31
32
# File 'lib/bio/db/pileup.rb', line 30

def ar3
  @ar3
end

#consensusObject

returns the consensus (most frequent) base from the pileup, if there are equally represented bases returns a string of all equally represented bases in alphabetical order



85
86
87
# File 'lib/bio/db/pileup.rb', line 85

def consensus
  @consensus
end

#consensus_qualityObject

Returns the value of attribute consensus_quality.



30
31
32
# File 'lib/bio/db/pileup.rb', line 30

def consensus_quality
  @consensus_quality
end

#coverageObject

Returns the value of attribute coverage.



30
31
32
# File 'lib/bio/db/pileup.rb', line 30

def coverage
  @coverage
end

#indel_1Object

Returns the value of attribute indel_1.



30
31
32
# File 'lib/bio/db/pileup.rb', line 30

def indel_1
  @indel_1
end

#indel_2Object

Returns the value of attribute indel_2.



30
31
32
# File 'lib/bio/db/pileup.rb', line 30

def indel_2
  @indel_2
end

#posObject

Returns the value of attribute pos.



30
31
32
# File 'lib/bio/db/pileup.rb', line 30

def pos
  @pos
end

#read_basesObject

Returns the value of attribute read_bases.



30
31
32
# File 'lib/bio/db/pileup.rb', line 30

def read_bases
  @read_bases
end

#read_qualsObject

Returns the value of attribute read_quals.



30
31
32
# File 'lib/bio/db/pileup.rb', line 30

def read_quals
  @read_quals
end

#ref_baseObject

Returns the value of attribute ref_base.



30
31
32
# File 'lib/bio/db/pileup.rb', line 30

def ref_base
  @ref_base
end

#ref_nameObject

Returns the value of attribute ref_name.



30
31
32
# File 'lib/bio/db/pileup.rb', line 30

def ref_name
  @ref_name
end

#rms_mapqObject

Returns the value of attribute rms_mapq.



30
31
32
# File 'lib/bio/db/pileup.rb', line 30

def rms_mapq
  @rms_mapq
end

#snp_qualityObject

Returns the value of attribute snp_quality.



30
31
32
# File 'lib/bio/db/pileup.rb', line 30

def snp_quality
  @snp_quality
end

Class Method Details

.iupac_to_base(alt_base) ⇒ Object

returns



184
185
186
187
188
189
190
191
192
193
194
# File 'lib/bio/db/pileup.rb', line 184

def Pileup.iupac_to_base(alt_base)
    case alt_base
          when 'K' then ['G','T']
          when 'M' then ['A','C']
          when 'S' then ['C','G']
          when 'R' then ['A','G']
          when 'W' then ['A','T']
          when 'Y' then ['C','T']
          else alt_base.split(//)
    end
end

Instance Method Details

#genotype_listObject



174
175
176
177
178
179
180
# File 'lib/bio/db/pileup.rb', line 174

def genotype_list
  if self.ref_base == '*'
    return indel_gt
  else
   return snp_gt
 end
end

#non_ref_countObject

returns the total non-reference bases in the reads at this position



69
70
71
72
73
74
# File 'lib/bio/db/pileup.rb', line 69

def non_ref_count
  if @non_ref_count.nil?
    @non_ref_count = @read_bases.count("ATGCatgc").to_f
  end
  @non_ref_count
end

#non_refsObject

Calculate the total count of each non-reference nucleotide and return a hash of all 4 nt counts, returns a hash

pile.non_refs #{:A => 1, :C => 0, :T => 0, :G => 0}


61
62
63
64
65
66
# File 'lib/bio/db/pileup.rb', line 61

def non_refs
  if @non_ref_count_hash.nil?
     @non_ref_count_hash = {:A => self.read_bases.count("Aa"), :C => self.read_bases.count("Cc"), :G => self.read_bases.count("Gg"), :T => self.read_bases.count("Tt")}
  end
    @non_ref_count_hash
end

#ref_countObject

returns the count of reference-bases in the reads at this position



77
78
79
80
81
82
# File 'lib/bio/db/pileup.rb', line 77

def ref_count
  if @ref_count.nil?
    @ref_count = self.read_bases.count(".,")
  end
  @ref_count
end

#to_sObject

returns pileup format line



197
198
199
200
201
202
203
204
205
206
# File 'lib/bio/db/pileup.rb', line 197

def to_s
  if @read_quals and !@consensus_quality #6col
    [@ref_name, @pos, @ref_base, @coverage.to_i, @read_bases, @read_quals].join("\t")    
  elsif @indel_1 #13 cols
    [@ref_name, @pos, @ref_base, @consensus, @consensus_quality.to_i, @snp_quality.to_i, @rms_mapq.to_i, @coverage.to_i, @indel_1, @indel_2, @ar1, @ar2, @ar3].join("\t")
  else #10 cols
    [@ref_name, @pos, @ref_base, @consensus, @consensus_quality.to_i, @snp_quality.to_i, @rms_mapq.to_i, @coverage.to_i, @read_bases, @read_quals].join("\t")
  end
    
end

#to_vcfObject

returns basic VCF string as per samtools/misc sam2vcf.pl except that it scrimps on the ref for indels, returning a ‘*’ instead of the reference allele



103
104
105
106
107
108
109
# File 'lib/bio/db/pileup.rb', line 103

def to_vcf

  alt,g = self.genotype_list 
  alt = self.consensus.split(//).join(',') unless self.ref_base == '*'
  alt = '.' if alt == self.ref_base
  [self.ref_name, self.pos, '.', self.ref_base, alt, self.snp_quality.to_i, "0", "DP=#{self.coverage.to_i}", "GT:GQ:DP", "#{g}:#{self.consensus_quality.to_i}:#{self.coverage.to_i}" ].join("\t")
end