Class: Transrate::Contig
- Inherits:
-
Object
- Object
- Transrate::Contig
- Extended by:
- Forwardable
- Includes:
- Enumerable
- Defined in:
- lib/transrate/contig.rb,
ext/transrate/transrate.c
Overview
A contig in a transcriptome assembly.
Instance Attribute Summary collapse
-
#collapse_factor ⇒ Object
reference-based metrics.
-
#coverage ⇒ Object
read-based metrics.
-
#has_crb ⇒ Object
reference-based metrics.
-
#hits ⇒ Object
Returns the value of attribute hits.
-
#in_bridges ⇒ Object
read-based metrics.
-
#is_chimera ⇒ Object
reference-based metrics.
-
#mean_coverage ⇒ Object
read-based metrics.
-
#name ⇒ Object
Returns the value of attribute name.
-
#reference_coverage ⇒ Object
reference-based metrics.
-
#seq ⇒ Object
Returns the value of attribute seq.
-
#uncovered_bases ⇒ Object
read-based metrics.
Instance Method Summary collapse
-
#at_skew ⇒ Object
AT skew.
-
#base_composition ⇒ Object
Base composition of the contig.
- #base_count ⇒ Object
-
#bases_a ⇒ Object
Number of bases that are A.
-
#bases_c ⇒ Object
Number of bases that are C.
-
#bases_g ⇒ Object
Number of bases that are G.
-
#bases_gc ⇒ Object
GC.
- #bases_n ⇒ Object
-
#bases_t ⇒ Object
Number of bases that are T.
-
#basic_metrics ⇒ Object
Get all metrics available for this contig.
- #comparative_metrics ⇒ Object
-
#composition ⇒ Object
VALUE TestInit(VALUE, VALUE, VALUE, VALUE, VALUE);.
-
#cpg_count ⇒ Object
CpG count.
-
#cpg_ratio ⇒ Object
observed-to-expected CpG (C-phosphate-G) ratio.
-
#dibase_composition ⇒ Object
Dibase composition of the contig.
- #dibase_count ⇒ Object
- #each(&block) ⇒ Object
-
#gc_skew ⇒ Object
GC skew.
-
#initialize(seq, name: nil) ⇒ Contig
constructor
A new instance of Contig.
- #kmer_count ⇒ Object
- #linguistic_complexity(k) ⇒ Object
- #longest_orf ⇒ Object
-
#orf_length ⇒ Object
Find the longest orf in the contig.
-
#prop_a ⇒ Object
Proportion of bases that are A.
-
#prop_c ⇒ Object
Proportion of bases that are C.
-
#prop_g ⇒ Object
Proportion of bases that are G.
- #prop_gc ⇒ Object
- #prop_n ⇒ Object
-
#prop_t ⇒ Object
Proportion of bases that are T.
- #read_metrics ⇒ Object
Constructor Details
#initialize(seq, name: nil) ⇒ Contig
Returns a new instance of Contig.
18 19 20 21 22 23 24 25 26 27 28 29 30 |
# File 'lib/transrate/contig.rb', line 18 def initialize(seq, name: nil) seq.seq.gsub!("\0", "") # there is probably a better fix than this @seq = seq @seq.data = nil # no need to store raw fasta string @name = seq.respond_to?(:entry_id) ? seq.entry_id : name @hits = [] @reference_coverage = 0 @collapse_factor = 0 @is_chimera = false @has_crb = false @in_bridges = 0 @mean_coverage = 0 end |
Instance Attribute Details
#collapse_factor ⇒ Object
reference-based metrics
15 16 17 |
# File 'lib/transrate/contig.rb', line 15 def collapse_factor @collapse_factor end |
#coverage ⇒ Object
read-based metrics
13 14 15 |
# File 'lib/transrate/contig.rb', line 13 def coverage @coverage end |
#has_crb ⇒ Object
reference-based metrics
15 16 17 |
# File 'lib/transrate/contig.rb', line 15 def has_crb @has_crb end |
#hits ⇒ Object
Returns the value of attribute hits.
16 17 18 |
# File 'lib/transrate/contig.rb', line 16 def hits @hits end |
#in_bridges ⇒ Object
read-based metrics
13 14 15 |
# File 'lib/transrate/contig.rb', line 13 def in_bridges @in_bridges end |
#is_chimera ⇒ Object
reference-based metrics
15 16 17 |
# File 'lib/transrate/contig.rb', line 15 def is_chimera @is_chimera end |
#mean_coverage ⇒ Object
read-based metrics
13 14 15 |
# File 'lib/transrate/contig.rb', line 13 def mean_coverage @mean_coverage end |
#name ⇒ Object
Returns the value of attribute name.
11 12 13 |
# File 'lib/transrate/contig.rb', line 11 def name @name end |
#reference_coverage ⇒ Object
reference-based metrics
15 16 17 |
# File 'lib/transrate/contig.rb', line 15 def reference_coverage @reference_coverage end |
#seq ⇒ Object
Returns the value of attribute seq.
11 12 13 |
# File 'lib/transrate/contig.rb', line 11 def seq @seq end |
#uncovered_bases ⇒ Object
read-based metrics
13 14 15 |
# File 'lib/transrate/contig.rb', line 13 def uncovered_bases @uncovered_bases end |
Instance Method Details
#at_skew ⇒ Object
AT skew
184 185 186 |
# File 'lib/transrate/contig.rb', line 184 def at_skew (bases_a - bases_t) / (bases_a + bases_t).to_f end |
#base_composition ⇒ Object
Base composition of the contig
If called and the instance variable @base_composition is nil then call the c method to count the bases and dibases in the sequence then get the info out of the c array and store it in the hash then if it is called again just return the hash as before
84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
# File 'lib/transrate/contig.rb', line 84 def base_composition if @base_composition return @base_composition end # else run the C method composition(@seq.seq) alphabet = ['a', 'c', 'g', 't', 'n'] @base_composition = {} @dibase_composition={} bases = [] dibases = [] alphabet.each do |c| bases << "#{c}".to_sym end alphabet.each do |c| alphabet.each do |d| dibases << "#{c}#{d}".to_sym end end bases.each_with_index do |a,i| @base_composition[a] = base_count(i) end dibases.each_with_index do |a,i| @dibase_composition[a] = dibase_count(i) end return @base_composition end |
#base_count ⇒ Object
15 |
# File 'ext/transrate/transrate.c', line 15 VALUE method_base_count(VALUE,VALUE); |
#bases_a ⇒ Object
Number of bases that are A
142 143 144 |
# File 'lib/transrate/contig.rb', line 142 def bases_a base_composition[:a] end |
#bases_c ⇒ Object
Number of bases that are C
122 123 124 |
# File 'lib/transrate/contig.rb', line 122 def bases_c base_composition[:c] end |
#bases_g ⇒ Object
Number of bases that are G
132 133 134 |
# File 'lib/transrate/contig.rb', line 132 def bases_g base_composition[:g] end |
#bases_gc ⇒ Object
GC
170 171 172 |
# File 'lib/transrate/contig.rb', line 170 def bases_gc bases_g + bases_c end |
#bases_n ⇒ Object
161 162 163 |
# File 'lib/transrate/contig.rb', line 161 def bases_n base_composition[:n] end |
#bases_t ⇒ Object
Number of bases that are T
152 153 154 |
# File 'lib/transrate/contig.rb', line 152 def bases_t base_composition[:t] end |
#basic_metrics ⇒ Object
Get all metrics available for this contig
37 38 39 40 41 42 43 44 45 46 47 48 |
# File 'lib/transrate/contig.rb', line 37 def basic_metrics basic = { :length => length, :prop_gc => prop_gc, :gc_skew => gc_skew, :at_skew => at_skew, :cpg_count => cpg_count, :cpg_ratio => cpg_ratio, :orf_length => orf_length, :linguistic_complexity_6 => linguistic_complexity(6) } end |
#comparative_metrics ⇒ Object
62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
# File 'lib/transrate/contig.rb', line 62 def comparative_metrics reference = @has_crb ? { :has_crb => has_crb, :collapse_factor => collapse_factor, :reference_coverage => reference_coverage, :is_chimera => is_chimera, :hits => hits.map{ |h| h.target }.join(";") } : { :has_crb => false, :collapse_factor => "NA", :reference_coverage => "NA", :is_chimera => "NA", :hits => "NA" } end |
#composition ⇒ Object
VALUE TestInit(VALUE, VALUE, VALUE, VALUE, VALUE);
14 |
# File 'ext/transrate/transrate.c', line 14 VALUE method_composition(VALUE, VALUE); |
#cpg_count ⇒ Object
CpG count
189 190 191 |
# File 'lib/transrate/contig.rb', line 189 def cpg_count dibase_composition[:cg] + dibase_composition[:gc] end |
#cpg_ratio ⇒ Object
observed-to-expected CpG (C-phosphate-G) ratio
194 195 196 197 198 199 |
# File 'lib/transrate/contig.rb', line 194 def cpg_ratio r = dibase_composition[:cg] + dibase_composition[:gc] r /= (bases_c * bases_g).to_f r *= (length - bases_n) return r end |
#dibase_composition ⇒ Object
Dibase composition of the contig
113 114 115 116 117 118 119 |
# File 'lib/transrate/contig.rb', line 113 def dibase_composition if @dibase_composition return @dibase_composition end base_composition @dibase_composition end |
#dibase_count ⇒ Object
16 |
# File 'ext/transrate/transrate.c', line 16 VALUE method_dibase_count(VALUE,VALUE); |
#each(&block) ⇒ Object
32 33 34 |
# File 'lib/transrate/contig.rb', line 32 def each &block @seq.seq.each_char &block end |
#gc_skew ⇒ Object
GC skew
179 180 181 |
# File 'lib/transrate/contig.rb', line 179 def gc_skew (bases_g - bases_c) / (bases_g + bases_c).to_f end |
#kmer_count ⇒ Object
17 |
# File 'ext/transrate/transrate.c', line 17 VALUE method_kmer_count(VALUE,VALUE,VALUE); |
#linguistic_complexity(k) ⇒ Object
208 209 210 |
# File 'lib/transrate/contig.rb', line 208 def linguistic_complexity k return kmer_count(k, @seq.seq)/(4**k).to_f # call to C end |
#longest_orf ⇒ Object
18 |
# File 'ext/transrate/transrate.c', line 18 VALUE method_longest_orf(VALUE, VALUE); |
#orf_length ⇒ Object
Find the longest orf in the contig
202 203 204 205 206 |
# File 'lib/transrate/contig.rb', line 202 def orf_length return @orf_length if @orf_length @orf_length = longest_orf(@seq.seq) # call to C return @orf_length end |
#prop_a ⇒ Object
Proportion of bases that are A
147 148 149 |
# File 'lib/transrate/contig.rb', line 147 def prop_a bases_a / length.to_f end |
#prop_c ⇒ Object
Proportion of bases that are C
127 128 129 |
# File 'lib/transrate/contig.rb', line 127 def prop_c bases_c / length.to_f end |
#prop_g ⇒ Object
Proportion of bases that are G
137 138 139 |
# File 'lib/transrate/contig.rb', line 137 def prop_g bases_g / length.to_f end |
#prop_gc ⇒ Object
174 175 176 |
# File 'lib/transrate/contig.rb', line 174 def prop_gc prop_g + prop_c end |
#prop_n ⇒ Object
165 166 167 |
# File 'lib/transrate/contig.rb', line 165 def prop_n bases_n / length.to_f end |
#prop_t ⇒ Object
Proportion of bases that are T
157 158 159 |
# File 'lib/transrate/contig.rb', line 157 def prop_t bases_t / length.to_f end |
#read_metrics ⇒ Object
50 51 52 53 54 55 56 57 58 59 60 |
# File 'lib/transrate/contig.rb', line 50 def read_metrics read = @coverage ? { :uncovered_bases => uncovered_bases, :mean_coverage => mean_coverage, :in_bridges => in_bridges } : { :uncovered_bases => "NA", :mean_coverage => "NA", :in_bridges => in_bridges } end |