Class: Bio::DB::Pileup
- Inherits:
-
Object
- Object
- Bio::DB::Pileup
- Defined in:
- lib/bio/db/pileup.rb
Instance Attribute Summary collapse
-
#ar1 ⇒ Object
Returns the value of attribute ar1.
-
#ar2 ⇒ Object
Returns the value of attribute ar2.
-
#ar3 ⇒ Object
Returns the value of attribute ar3.
-
#consensus ⇒ Object
returns the consensus (most frequent) base from the pileup, if there are equally represented bases returns a string of all equally represented bases in alphabetical order.
-
#consensus_quality ⇒ Object
Returns the value of attribute consensus_quality.
-
#coverage ⇒ Object
Returns the value of attribute coverage.
-
#indel_1 ⇒ Object
Returns the value of attribute indel_1.
-
#indel_2 ⇒ Object
Returns the value of attribute indel_2.
-
#pos ⇒ Object
Returns the value of attribute pos.
-
#read_bases ⇒ Object
Returns the value of attribute read_bases.
-
#read_quals ⇒ Object
Returns the value of attribute read_quals.
-
#ref_base ⇒ Object
Returns the value of attribute ref_base.
-
#ref_name ⇒ Object
Returns the value of attribute ref_name.
-
#rms_mapq ⇒ Object
Returns the value of attribute rms_mapq.
-
#snp_quality ⇒ Object
Returns the value of attribute snp_quality.
Class Method Summary collapse
Instance Method Summary collapse
-
#allele_freq ⇒ Object
returns the frequency of all bases in pileup position.
- #base_coverage ⇒ Object
- #bases ⇒ Object
-
#consensus_iuap(minumum_ratio_for_iup_consensus) ⇒ Object
returns the consensus (most frequent) base from the pileup, if there are equally represented bases returns a string of all equally represented bases in alphabetical order.
- #genotype_list ⇒ Object
-
#initialize(pile_up_line) ⇒ Pileup
constructor
creates the Pileup object pile_up_line = “seq2t151tGtGt36t0t99t12t.….……At:9<;;7=<<<<<” pile = Bio::DB::Pileup.new(pile_up_line).
-
#non_ref_count ⇒ Object
returns the total non-reference bases in the reads at this position.
-
#non_refs ⇒ Object
Calculate the total count of each non-reference nucleotide and return a hash of all 4 nt counts returns a hash pile.non_refs #=> 1, :C => 0, :T => 0, :G => 0.
-
#parse_indel(alt) ⇒ Object
identifies if the indel is an insertion or a deletion.
-
#ref_count ⇒ Object
returns the count of reference-bases in the reads at this position.
-
#to_s ⇒ Object
returns pileup format line.
-
#to_vcf ⇒ Object
returns basic VCF string as per samtools/misc sam2vcf.pl except that it scrimps on the ref for indels, returning a ‘*’ instead of the reference allele.
Constructor Details
#initialize(pile_up_line) ⇒ Pileup
creates the Pileup object
pile_up_line = "seq2\t151\tG\tG\t36\t0\t99\t12\t...........A\t:9<;;7=<<<<<"
pile = Bio::DB::Pileup.new(pile_up_line)
35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
# File 'lib/bio/db/pileup.rb', line 35 def initialize(pile_up_line) cols = pile_up_line.split(/\t/) if cols.length == 6 ##should only be able to get 6 lines from mpileup @ref_name, @pos, @ref_base, @coverage, @read_bases, @read_quals = cols elsif (10..13).include?(cols.length) ##incase anyone tries to use deprecated pileup with -c flag we get upto 13 cols... if cols[2] == '*' #indel @ref_name, @pos, @ref_base, @consensus, @consensus_quality, @snp_quality, @rms_mapq, @coverage, @indel_1, @indel_2, @ar1, @ar2, @ar3 = cols else #snp / identity @ref_name, @pos, @ref_base, @consensus, @consensus_quality, @snp_quality, @rms_mapq, @coverage, @read_bases, @read_quals = cols end @consensus_quality = @consensus_quality.to_f @snp_quality = @snp_quality.to_f @rms_mapq = @rms_mapq.to_f else #raise RuntimeError, "parsing line '#{pile_up_line.chomp}' failed" end @pos = @pos.to_i @coverage = @coverage.to_f @ref_count = nil @non_ref_count_hash = nil @non_ref_count = nil end |
Instance Attribute Details
#ar1 ⇒ Object
Returns the value of attribute ar1.
30 31 32 |
# File 'lib/bio/db/pileup.rb', line 30 def ar1 @ar1 end |
#ar2 ⇒ Object
Returns the value of attribute ar2.
30 31 32 |
# File 'lib/bio/db/pileup.rb', line 30 def ar2 @ar2 end |
#ar3 ⇒ Object
Returns the value of attribute ar3.
30 31 32 |
# File 'lib/bio/db/pileup.rb', line 30 def ar3 @ar3 end |
#consensus ⇒ Object
returns the consensus (most frequent) base from the pileup, if there are equally represented bases returns a string of all equally represented bases in alphabetical order
85 86 87 |
# File 'lib/bio/db/pileup.rb', line 85 def consensus @consensus end |
#consensus_quality ⇒ Object
Returns the value of attribute consensus_quality.
30 31 32 |
# File 'lib/bio/db/pileup.rb', line 30 def consensus_quality @consensus_quality end |
#coverage ⇒ Object
Returns the value of attribute coverage.
30 31 32 |
# File 'lib/bio/db/pileup.rb', line 30 def coverage @coverage end |
#indel_1 ⇒ Object
Returns the value of attribute indel_1.
30 31 32 |
# File 'lib/bio/db/pileup.rb', line 30 def indel_1 @indel_1 end |
#indel_2 ⇒ Object
Returns the value of attribute indel_2.
30 31 32 |
# File 'lib/bio/db/pileup.rb', line 30 def indel_2 @indel_2 end |
#pos ⇒ Object
Returns the value of attribute pos.
30 31 32 |
# File 'lib/bio/db/pileup.rb', line 30 def pos @pos end |
#read_bases ⇒ Object
Returns the value of attribute read_bases.
30 31 32 |
# File 'lib/bio/db/pileup.rb', line 30 def read_bases @read_bases end |
#read_quals ⇒ Object
Returns the value of attribute read_quals.
30 31 32 |
# File 'lib/bio/db/pileup.rb', line 30 def read_quals @read_quals end |
#ref_base ⇒ Object
Returns the value of attribute ref_base.
30 31 32 |
# File 'lib/bio/db/pileup.rb', line 30 def ref_base @ref_base end |
#ref_name ⇒ Object
Returns the value of attribute ref_name.
30 31 32 |
# File 'lib/bio/db/pileup.rb', line 30 def ref_name @ref_name end |
#rms_mapq ⇒ Object
Returns the value of attribute rms_mapq.
30 31 32 |
# File 'lib/bio/db/pileup.rb', line 30 def rms_mapq @rms_mapq end |
#snp_quality ⇒ Object
Returns the value of attribute snp_quality.
30 31 32 |
# File 'lib/bio/db/pileup.rb', line 30 def snp_quality @snp_quality end |
Class Method Details
.iupac_to_base(alt_base) ⇒ Object
181 182 183 184 185 186 187 188 189 190 191 |
# File 'lib/bio/db/pileup.rb', line 181 def Pileup.iupac_to_base(alt_base) case alt_base when 'K' then ['G','T'] when 'M' then ['A','C'] when 'S' then ['C','G'] when 'R' then ['A','G'] when 'W' then ['A','T'] when 'Y' then ['C','T'] else alt_base.split(//) end end |
Instance Method Details
#allele_freq ⇒ Object
returns the frequency of all bases in pileup position
234 235 236 237 238 239 240 241 242 |
# File 'lib/bio/db/pileup.rb', line 234 def allele_freq return @allele_frequency if @allele_frequency bases = self.bases @allele_frequency = Hash.new bases.each do |k,v| @allele_frequency[k] = v.to_f/self.base_coverage.to_f end @allele_frequency end |
#base_coverage ⇒ Object
225 226 227 228 229 230 231 |
# File 'lib/bio/db/pileup.rb', line 225 def base_coverage total = 0 @bases.each do |k,v| total += v end total end |
#bases ⇒ Object
217 218 219 220 221 222 223 |
# File 'lib/bio/db/pileup.rb', line 217 def bases return @bases if @bases @bases = self.non_refs #puts self.ref_count @bases[self.ref_base.upcase.to_sym] = self.ref_count @bases end |
#consensus_iuap(minumum_ratio_for_iup_consensus) ⇒ Object
returns the consensus (most frequent) base from the pileup, if there are equally represented bases returns a string of all equally represented bases in alphabetical order
245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 |
# File 'lib/bio/db/pileup.rb', line 245 def consensus_iuap(minumum_ratio_for_iup_consensus) tmp = [] if @consensus_iuap.nil? @consensus_iuap = self.ref_base.downcase bases = self.bases #tmp = String.new bases.each do |k,v| tmp << k[0].to_s if v/self.coverage.to_f > minumum_ratio_for_iup_consensus end if tmp.length > 0 tmp = tmp.collect{ |x| Bio::Sequence::NA.new(x) } # creates alignment object a = Bio::Alignment.new(tmp) # shows IUPAC consensus @consensus_iuap = a.consensus_iupac end end @consensus_iuap end |
#genotype_list ⇒ Object
171 172 173 174 175 176 177 |
# File 'lib/bio/db/pileup.rb', line 171 def genotype_list if self.ref_base == '*' return indel_gt else return snp_gt end end |
#non_ref_count ⇒ Object
returns the total non-reference bases in the reads at this position
69 70 71 72 73 74 |
# File 'lib/bio/db/pileup.rb', line 69 def non_ref_count if @non_ref_count.nil? @non_ref_count = @read_bases.count("ATGCatgc").to_f end @non_ref_count end |
#non_refs ⇒ Object
Calculate the total count of each non-reference nucleotide and return a hash of all 4 nt counts returns a hash pile.non_refs #=> 1, :C => 0, :T => 0, :G => 0
61 62 63 64 65 66 |
# File 'lib/bio/db/pileup.rb', line 61 def non_refs if @non_ref_count_hash.nil? @non_ref_count_hash = {:A => self.read_bases.count("Aa"), :C => self.read_bases.count("Cc"), :G => self.read_bases.count("Gg"), :T => self.read_bases.count("Tt")} end @non_ref_count_hash end |
#parse_indel(alt) ⇒ Object
identifies if the indel is an insertion or a deletion
194 195 196 197 198 199 200 201 |
# File 'lib/bio/db/pileup.rb', line 194 def parse_indel(alt) return "D#{$'.length}" if alt =~/^-/ if alt=~/^\+/ return "I#{$'}" elsif alt == '*' return nil end end |
#ref_count ⇒ Object
returns the count of reference-bases in the reads at this position
77 78 79 80 81 82 |
# File 'lib/bio/db/pileup.rb', line 77 def ref_count if @ref_count.nil? @ref_count = self.read_bases.count(".,") end @ref_count end |
#to_s ⇒ Object
returns pileup format line
205 206 207 208 209 210 211 212 213 214 |
# File 'lib/bio/db/pileup.rb', line 205 def to_s if @read_quals and !@consensus_quality #6col [@ref_name, @pos, @ref_base, @coverage.to_i, @read_bases, @read_quals].join("\t") elsif @indel_1 #13 cols [@ref_name, @pos, @ref_base, @consensus, @consensus_quality.to_i, @snp_quality.to_i, @rms_mapq.to_i, @coverage.to_i, @indel_1, @indel_2, @ar1, @ar2, @ar3].join("\t") else #10 cols [@ref_name, @pos, @ref_base, @consensus, @consensus_quality.to_i, @snp_quality.to_i, @rms_mapq.to_i, @coverage.to_i, @read_bases, @read_quals].join("\t") end end |
#to_vcf ⇒ Object
returns basic VCF string as per samtools/misc sam2vcf.pl except that it scrimps on the ref for indels, returning a ‘*’ instead of the reference allele
109 110 111 112 113 114 115 116 117 118 |
# File 'lib/bio/db/pileup.rb', line 109 def to_vcf alt,g = self.genotype_list alt = self.consensus.split(//).join(',') unless self.ref_base == '*' alt = '.' if alt == self.ref_base alt = alt.split(',') #if the reference base is in alt, remove it alt.delete(self.ref_base.to_s) alt = alt.join(',') [self.ref_name, self.pos, '.', self.ref_base, alt, self.snp_quality.to_i, "0", "DP=#{self.coverage.to_i}", "GT:GQ:DP", "#{g}:#{self.consensus_quality.to_i}:#{self.coverage.to_i}" ].join("\t") end |