Class: Transrate::Contig
- Inherits:
-
Object
- Object
- Transrate::Contig
- Extended by:
- Forwardable
- Includes:
- Enumerable
- Defined in:
- lib/transrate/contig.rb,
ext/transrate/transrate.c
Overview
A contig in a transcriptome assembly.
Instance Attribute Summary collapse
-
#classification ⇒ Object
Returns the value of attribute classification.
-
#coverage ⇒ Object
Returns the value of attribute coverage.
-
#eff_count ⇒ Object
read-based metrics.
-
#eff_length ⇒ Object
read-based metrics.
-
#good ⇒ Object
Returns the value of attribute good.
-
#has_crb ⇒ Object
reference-based metrics.
-
#hits ⇒ Object
Returns the value of attribute hits.
-
#in_bridges ⇒ Object
Returns the value of attribute in_bridges.
-
#low_uniqueness_bases ⇒ Object
Returns the value of attribute low_uniqueness_bases.
-
#name ⇒ Object
Returns the value of attribute name.
-
#p_good ⇒ Object
Returns the value of attribute p_good.
-
#p_not_segmented ⇒ Object
Returns the value of attribute p_not_segmented.
-
#p_seq_true ⇒ Object
Returns the value of attribute p_seq_true.
-
#p_uncovered_bases ⇒ Object
Returns the value of attribute p_uncovered_bases.
-
#reference_coverage ⇒ Object
reference-based metrics.
-
#seq ⇒ Object
Returns the value of attribute seq.
-
#tpm ⇒ Object
read-based metrics.
-
#uncovered_bases ⇒ Object
Returns the value of attribute uncovered_bases.
Instance Method Summary collapse
-
#at_skew ⇒ Object
AT skew.
-
#base_composition ⇒ Object
Base composition of the contig.
- #base_count ⇒ Object
-
#bases_a ⇒ Object
Number of bases that are A.
-
#bases_c ⇒ Object
Number of bases that are C.
-
#bases_g ⇒ Object
Number of bases that are G.
-
#bases_gc ⇒ Object
GC.
- #bases_n ⇒ Object
-
#bases_t ⇒ Object
Number of bases that are T.
-
#basic_metrics ⇒ Object
Get all metrics available for this contig.
-
#classify(cutoff) ⇒ Object
Classify the contig into one of the following classes: - good (score >= 0.5) - fragmented (in_bridges > 0) and no other problems - chimeric (p_not_segmented < 0.25) and no other problems - bad (score < 0.5 and not in any other category).
- #comparative_metrics ⇒ Object
-
#composition ⇒ Object
contig.
-
#cpg_count ⇒ Object
CpG count.
-
#cpg_ratio ⇒ Object
observed-to-expected CpG (C-phosphate-G) ratio.
-
#dibase_composition ⇒ Object
Dibase composition of the contig.
- #dibase_count ⇒ Object
- #each(&block) ⇒ Object
-
#gc_skew ⇒ Object
GC skew.
-
#initialize(seq, name: nil) ⇒ Contig
constructor
A new instance of Contig.
- #kmer_count ⇒ Object
- #linguistic_complexity(k) ⇒ Object
- #longest_orf ⇒ Object
-
#orf_length ⇒ Object
Find the longest orf in the contig.
- #p_bases_covered ⇒ Object
-
#prop_a ⇒ Object
Proportion of bases that are A.
-
#prop_c ⇒ Object
Proportion of bases that are C.
-
#prop_g ⇒ Object
Proportion of bases that are G.
- #prop_gc ⇒ Object
- #prop_n ⇒ Object
-
#prop_t ⇒ Object
Proportion of bases that are T.
- #read_metrics ⇒ Object
-
#score ⇒ Object
Contig score (product of all score components).
- #to_fasta ⇒ Object
Constructor Details
#initialize(seq, name: nil) ⇒ Contig
Returns a new instance of Contig.
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
# File 'lib/transrate/contig.rb', line 22 def initialize(seq, name: nil) # fix null bytes in the nucleotide sequence seq.seq.gsub!("\0", "") # trim trailing semicolons (because BLAST strips them) if seq.respond_to?(:entry_id) seq.entry_id.gsub!(/;$/, '') end @seq = seq @seq.data = nil # no need to store raw fasta string @name = seq.respond_to?(:entry_id) ? seq.entry_id : name @hits = [] @reference_coverage = 0 @has_crb = false @in_bridges = 0 @p_good = 0 @p_seq_true = 0 @uncovered_bases = length @p_uncovered_bases = 1 @p_not_segmented = 1 @score = -1 @good = 0 @coverage = 0 @classification = :unknown end |
Instance Attribute Details
#classification ⇒ Object
Returns the value of attribute classification.
17 18 19 |
# File 'lib/transrate/contig.rb', line 17 def classification @classification end |
#coverage ⇒ Object
Returns the value of attribute coverage.
14 15 16 |
# File 'lib/transrate/contig.rb', line 14 def coverage @coverage end |
#eff_count ⇒ Object
read-based metrics
13 14 15 |
# File 'lib/transrate/contig.rb', line 13 def eff_count @eff_count end |
#eff_length ⇒ Object
read-based metrics
13 14 15 |
# File 'lib/transrate/contig.rb', line 13 def eff_length @eff_length end |
#good ⇒ Object
Returns the value of attribute good.
17 18 19 |
# File 'lib/transrate/contig.rb', line 17 def good @good end |
#has_crb ⇒ Object
reference-based metrics
19 20 21 |
# File 'lib/transrate/contig.rb', line 19 def has_crb @has_crb end |
#hits ⇒ Object
Returns the value of attribute hits.
20 21 22 |
# File 'lib/transrate/contig.rb', line 20 def hits @hits end |
#in_bridges ⇒ Object
Returns the value of attribute in_bridges.
16 17 18 |
# File 'lib/transrate/contig.rb', line 16 def in_bridges @in_bridges end |
#low_uniqueness_bases ⇒ Object
Returns the value of attribute low_uniqueness_bases.
16 17 18 |
# File 'lib/transrate/contig.rb', line 16 def low_uniqueness_bases @low_uniqueness_bases end |
#name ⇒ Object
Returns the value of attribute name.
11 12 13 |
# File 'lib/transrate/contig.rb', line 11 def name @name end |
#p_good ⇒ Object
Returns the value of attribute p_good.
17 18 19 |
# File 'lib/transrate/contig.rb', line 17 def p_good @p_good end |
#p_not_segmented ⇒ Object
Returns the value of attribute p_not_segmented.
17 18 19 |
# File 'lib/transrate/contig.rb', line 17 def p_not_segmented @p_not_segmented end |
#p_seq_true ⇒ Object
Returns the value of attribute p_seq_true.
15 16 17 |
# File 'lib/transrate/contig.rb', line 15 def p_seq_true @p_seq_true end |
#p_uncovered_bases ⇒ Object
Returns the value of attribute p_uncovered_bases.
14 15 16 |
# File 'lib/transrate/contig.rb', line 14 def p_uncovered_bases @p_uncovered_bases end |
#reference_coverage ⇒ Object
reference-based metrics
19 20 21 |
# File 'lib/transrate/contig.rb', line 19 def reference_coverage @reference_coverage end |
#seq ⇒ Object
Returns the value of attribute seq.
11 12 13 |
# File 'lib/transrate/contig.rb', line 11 def seq @seq end |
#tpm ⇒ Object
read-based metrics
13 14 15 |
# File 'lib/transrate/contig.rb', line 13 def tpm @tpm end |
#uncovered_bases ⇒ Object
Returns the value of attribute uncovered_bases.
14 15 16 |
# File 'lib/transrate/contig.rb', line 14 def uncovered_bases @uncovered_bases end |
Instance Method Details
#at_skew ⇒ Object
AT skew
198 199 200 |
# File 'lib/transrate/contig.rb', line 198 def at_skew (bases_a - bases_t) / (bases_a + bases_t).to_f end |
#base_composition ⇒ Object
Base composition of the contig
If called and the instance variable @base_composition is nil then call the c method to count the bases and dibases in the sequence then get the info out of the c array and store it in the hash then if it is called again just return the hash as before
98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
# File 'lib/transrate/contig.rb', line 98 def base_composition if @base_composition return @base_composition end # else run the C method composition(@seq.seq) alphabet = ['a', 'c', 'g', 't', 'n'] @base_composition = {} @dibase_composition = {} bases = [] dibases = [] alphabet.each do |c| bases << "#{c}".to_sym end alphabet.each do |c| alphabet.each do |d| dibases << "#{c}#{d}".to_sym end end bases.each_with_index do |a,i| @base_composition[a] = base_count(i) end dibases.each_with_index do |a,i| @dibase_composition[a] = dibase_count(i) end return @base_composition end |
#base_count ⇒ Object
17 |
# File 'ext/transrate/transrate.c', line 17 VALUE method_base_count(VALUE,VALUE); |
#bases_a ⇒ Object
Number of bases that are A
156 157 158 |
# File 'lib/transrate/contig.rb', line 156 def bases_a base_composition[:a] end |
#bases_c ⇒ Object
Number of bases that are C
136 137 138 |
# File 'lib/transrate/contig.rb', line 136 def bases_c base_composition[:c] end |
#bases_g ⇒ Object
Number of bases that are G
146 147 148 |
# File 'lib/transrate/contig.rb', line 146 def bases_g base_composition[:g] end |
#bases_gc ⇒ Object
GC
184 185 186 |
# File 'lib/transrate/contig.rb', line 184 def bases_gc bases_g + bases_c end |
#bases_n ⇒ Object
175 176 177 |
# File 'lib/transrate/contig.rb', line 175 def bases_n base_composition[:n] end |
#bases_t ⇒ Object
Number of bases that are T
166 167 168 |
# File 'lib/transrate/contig.rb', line 166 def bases_t base_composition[:t] end |
#basic_metrics ⇒ Object
Get all metrics available for this contig
52 53 54 55 56 57 58 59 60 61 62 63 |
# File 'lib/transrate/contig.rb', line 52 def basic_metrics basic = { :length => length, :prop_gc => prop_gc, :gc_skew => gc_skew, :at_skew => at_skew, :cpg_count => cpg_count, :cpg_ratio => cpg_ratio, :orf_length => orf_length, :linguistic_complexity_6 => linguistic_complexity(6), } end |
#classify(cutoff) ⇒ Object
Classify the contig into one of the following classes:
-
good (score >= 0.5)
-
fragmented (in_bridges > 0) and no other problems
-
chimeric (p_not_segmented < 0.25) and no other problems
-
bad (score < 0.5 and not in any other category)
251 252 253 254 255 256 257 258 |
# File 'lib/transrate/contig.rb', line 251 def classify cutoff if score >= cutoff @classification = :good else @classification = :bad end return @classification end |
#comparative_metrics ⇒ Object
80 81 82 83 84 85 86 87 88 89 90 |
# File 'lib/transrate/contig.rb', line 80 def comparative_metrics reference = @has_crb ? { :has_crb => has_crb, :reference_coverage => reference_coverage, :hits => hits.map{ |h| h.target }.join(";") } : { :has_crb => false, :reference_coverage => "NA", :hits => "NA" } end |
#composition ⇒ Object
contig
16 |
# File 'ext/transrate/transrate.c', line 16 VALUE method_composition(VALUE, VALUE); |
#cpg_count ⇒ Object
CpG count
203 204 205 |
# File 'lib/transrate/contig.rb', line 203 def cpg_count dibase_composition[:cg] + dibase_composition[:gc] end |
#cpg_ratio ⇒ Object
observed-to-expected CpG (C-phosphate-G) ratio
208 209 210 211 212 213 |
# File 'lib/transrate/contig.rb', line 208 def cpg_ratio r = dibase_composition[:cg] + dibase_composition[:gc] r /= (bases_c * bases_g).to_f r *= (length - bases_n) return r end |
#dibase_composition ⇒ Object
Dibase composition of the contig
127 128 129 130 131 132 133 |
# File 'lib/transrate/contig.rb', line 127 def dibase_composition if @dibase_composition return @dibase_composition end base_composition @dibase_composition end |
#dibase_count ⇒ Object
18 |
# File 'ext/transrate/transrate.c', line 18 VALUE method_dibase_count(VALUE,VALUE); |
#each(&block) ⇒ Object
47 48 49 |
# File 'lib/transrate/contig.rb', line 47 def each &block @seq.seq.each_char &block end |
#gc_skew ⇒ Object
GC skew
193 194 195 |
# File 'lib/transrate/contig.rb', line 193 def gc_skew (bases_g - bases_c) / (bases_g + bases_c).to_f end |
#kmer_count ⇒ Object
19 |
# File 'ext/transrate/transrate.c', line 19 VALUE method_kmer_count(VALUE,VALUE,VALUE); |
#linguistic_complexity(k) ⇒ Object
222 223 224 |
# File 'lib/transrate/contig.rb', line 222 def linguistic_complexity k return kmer_count(k, @seq.seq)/(4**k).to_f # call to C end |
#longest_orf ⇒ Object
20 |
# File 'ext/transrate/transrate.c', line 20 VALUE method_longest_orf(VALUE, VALUE); |
#orf_length ⇒ Object
Find the longest orf in the contig
216 217 218 219 220 |
# File 'lib/transrate/contig.rb', line 216 def orf_length return @orf_length if @orf_length @orf_length = longest_orf(@seq.seq) # call to C return @orf_length end |
#p_bases_covered ⇒ Object
226 227 228 |
# File 'lib/transrate/contig.rb', line 226 def p_bases_covered 1 - p_uncovered_bases end |
#prop_a ⇒ Object
Proportion of bases that are A
161 162 163 |
# File 'lib/transrate/contig.rb', line 161 def prop_a bases_a / length.to_f end |
#prop_c ⇒ Object
Proportion of bases that are C
141 142 143 |
# File 'lib/transrate/contig.rb', line 141 def prop_c bases_c / length.to_f end |
#prop_g ⇒ Object
Proportion of bases that are G
151 152 153 |
# File 'lib/transrate/contig.rb', line 151 def prop_g bases_g / length.to_f end |
#prop_gc ⇒ Object
188 189 190 |
# File 'lib/transrate/contig.rb', line 188 def prop_gc prop_g + prop_c end |
#prop_n ⇒ Object
179 180 181 |
# File 'lib/transrate/contig.rb', line 179 def prop_n bases_n / length.to_f end |
#prop_t ⇒ Object
Proportion of bases that are T
171 172 173 |
# File 'lib/transrate/contig.rb', line 171 def prop_t bases_t / length.to_f end |
#read_metrics ⇒ Object
65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
# File 'lib/transrate/contig.rb', line 65 def read_metrics { :in_bridges => in_bridges, :p_good => p_good, :p_bases_covered => p_bases_covered, :p_seq_true => p_seq_true, :score => score, :p_not_segmented => p_not_segmented, :eff_length => eff_length, :eff_count => eff_count, :tpm => tpm, :coverage => coverage } end |
#score ⇒ Object
Contig score (product of all score components)
236 237 238 239 240 241 242 243 244 |
# File 'lib/transrate/contig.rb', line 236 def score return @score if @score != -1 prod = [p_bases_covered, 0.01].max.to_f * # proportion of bases covered [p_not_segmented, 0.01].max.to_f * # prob contig has 0 changepoints [p_good, 0.01].max.to_f * # proportion of reads that mapped good [p_seq_true, 0.01].max.to_f # scaled 1 - mean per-base edit distance @score = [prod, 0.01].max end |
#to_fasta ⇒ Object
260 261 262 |
# File 'lib/transrate/contig.rb', line 260 def to_fasta @seq.seq.to_fasta(@name) end |