Class: Transrate::ComparativeMetrics
- Inherits:
-
Object
- Object
- Transrate::ComparativeMetrics
- Defined in:
- lib/transrate/comparative_metrics.rb
Instance Attribute Summary collapse
-
#comp_stats ⇒ Object
readonly
Returns the value of attribute comp_stats.
-
#has_run ⇒ Object
readonly
Returns the value of attribute has_run.
-
#n_chimeras ⇒ Object
readonly
Returns the value of attribute n_chimeras.
-
#p_chimeras ⇒ Object
readonly
Returns the value of attribute p_chimeras.
-
#rbh_per_contig ⇒ Object
readonly
Returns the value of attribute rbh_per_contig.
-
#rbh_per_reference ⇒ Object
readonly
Returns the value of attribute rbh_per_reference.
-
#reciprocal_hits ⇒ Object
readonly
Returns the value of attribute reciprocal_hits.
-
#reference_coverage ⇒ Object
readonly
Returns the value of attribute reference_coverage.
Instance Method Summary collapse
-
#calculate_coverage(blocks) ⇒ Object
Calculate the total coverage from a set of coverage blocks.
- #chimeras(crbblast) ⇒ Object
-
#collapse_factor(reciprocals) ⇒ Object
Count unique reference proteins per contig.
-
#count_ref_crbbs ⇒ Object
Count reference proteins with at least one recprocal hit.
-
#coverage(crbblast) ⇒ Object
coverage of contigs that have reciprocal hits divided by number of reciprocal targets.
-
#initialize(assembly, reference, threads) ⇒ ComparativeMetrics
constructor
A new instance of ComparativeMetrics.
- #overlap(astart, astop, bstart, bstop) ⇒ Object
- #overlap_amount(astart, astop, bstart, bstop) ⇒ Object
- #reciprocal_best_blast ⇒ Object
- #run ⇒ Object
- #run_comp_stats ⇒ Object
Constructor Details
#initialize(assembly, reference, threads) ⇒ ComparativeMetrics
Returns a new instance of ComparativeMetrics.
16 17 18 19 20 21 |
# File 'lib/transrate/comparative_metrics.rb', line 16 def initialize assembly, reference, threads @assembly = assembly @reference = reference @threads = threads @comp_stats = Hash.new end |
Instance Attribute Details
#comp_stats ⇒ Object (readonly)
Returns the value of attribute comp_stats.
13 14 15 |
# File 'lib/transrate/comparative_metrics.rb', line 13 def comp_stats @comp_stats end |
#has_run ⇒ Object (readonly)
Returns the value of attribute has_run.
11 12 13 |
# File 'lib/transrate/comparative_metrics.rb', line 11 def has_run @has_run end |
#n_chimeras ⇒ Object (readonly)
Returns the value of attribute n_chimeras.
14 15 16 |
# File 'lib/transrate/comparative_metrics.rb', line 14 def n_chimeras @n_chimeras end |
#p_chimeras ⇒ Object (readonly)
Returns the value of attribute p_chimeras.
14 15 16 |
# File 'lib/transrate/comparative_metrics.rb', line 14 def p_chimeras @p_chimeras end |
#rbh_per_contig ⇒ Object (readonly)
Returns the value of attribute rbh_per_contig.
8 9 10 |
# File 'lib/transrate/comparative_metrics.rb', line 8 def rbh_per_contig @rbh_per_contig end |
#rbh_per_reference ⇒ Object (readonly)
Returns the value of attribute rbh_per_reference.
9 10 11 |
# File 'lib/transrate/comparative_metrics.rb', line 9 def rbh_per_reference @rbh_per_reference end |
#reciprocal_hits ⇒ Object (readonly)
Returns the value of attribute reciprocal_hits.
10 11 12 |
# File 'lib/transrate/comparative_metrics.rb', line 10 def reciprocal_hits @reciprocal_hits end |
#reference_coverage ⇒ Object (readonly)
Returns the value of attribute reference_coverage.
12 13 14 |
# File 'lib/transrate/comparative_metrics.rb', line 12 def reference_coverage @reference_coverage end |
Instance Method Details
#calculate_coverage(blocks) ⇒ Object
Calculate the total coverage from a set of coverage blocks
191 192 193 194 195 196 197 198 199 200 201 202 203 |
# File 'lib/transrate/comparative_metrics.rb', line 191 def calculate_coverage blocks coverage = 0 blocks.each do |block| if block[0] and block[1] if block[0]>=0 and block[1]>=0 coverage += block[1] - block[0] + 1 end else puts "error: key = #{key}, #{blocks}" end end coverage end |
#chimeras(crbblast) ⇒ Object
213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 |
# File 'lib/transrate/comparative_metrics.rb', line 213 def chimeras crbblast @n_chimeras = 0 crbblast.reciprocals.each_pair do |key, list| p = 0 list.each_with_index do |a, i| list.each_with_index do |b, j| if j>i if a.target == b.target astart, astop = [a.tstart, a.tend].minmax bstart, bstop = [b.tstart, b.tend].minmax oa = overlap_amount(astart, astop, bstart, bstop) if oa > 0.75 p += 1 end else astart, astop = [a.qstart, a.qend].minmax bstart, bstop = [b.qstart, b.qend].minmax oa = overlap_amount(astart, astop, bstart, bstop) if oa < 0.25 p += 1 end end end end end if p/list.size.to_f >= 0.5 @n_chimeras += 1 unless @assembly.assembly.key? key puts "key not in assembly: #{key}" end @assembly[key].is_chimera = true end end @p_chimeras = @n_chimeras / crbblast.reciprocals.length.to_f end |
#collapse_factor(reciprocals) ⇒ Object
Count unique reference proteins per contig
304 305 306 307 308 309 310 311 312 313 314 |
# File 'lib/transrate/comparative_metrics.rb', line 304 def collapse_factor reciprocals return @collapse_factor unless @collapse_factor.nil? cf_sum = 0 reciprocals.each do |query, hits| uniq_hits = Set.new hits.map{ |h| h.target } cf = uniq_hits.length @assembly[query].collapse_factor = cf cf_sum += cf end cf_sum / reciprocals.size end |
#count_ref_crbbs ⇒ Object
Count reference proteins with at least one recprocal hit
206 207 208 209 210 211 |
# File 'lib/transrate/comparative_metrics.rb', line 206 def count_ref_crbbs @n_refs_with_recip = @reference.assembly.inject(0) do |sum, entry| name, contig = entry sum + (contig.hits.length > 0 ? 1 : 0) end end |
#coverage(crbblast) ⇒ Object
coverage of contigs that have reciprocal hits divided by number of reciprocal targets
59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 |
# File 'lib/transrate/comparative_metrics.rb', line 59 def coverage crbblast return @reference_coverage unless @reference_coverage.nil? crbblast.reciprocals.each do |key, list| list.each_with_index do |hit, i| unless @reference.assembly.key? hit.target raise "#{hit.target} not in reference" end @reference[hit.target].hits << hit unless @assembly.assembly.key? hit.query raise "#{hit.query} not in assembly" end contig = @assembly[hit.query] contig.has_crb = true # how much of the reference is covered by this single contig contig.reference_coverage = hit.alnlen / hit.tlen contig.hits << hit end end total_coverage = 0 total_length = 0 cov = [0.25, 0.5, 0.75, 0.85, 0.95] @reference.each_value do |ref_contig| key = ref_contig.name list = ref_contig.hits total_length += crbblast.target_is_prot ? ref_contig.length : ref_contig.length*3 next if list.empty? # ah this is what was breaking everything blocks = [] target_length = 0 list.each do |hit| target_length = hit.tlen if crbblast.target_is_prot target_length *= 3 start, stop = [hit.tstart, hit.tend].minmax start = start*3-2 stop = stop*3 else start, stop = [hit.tstart, hit.tend].minmax end if blocks.empty? blocks << [start, stop] else found=false blocks.each do |block| # if query overlaps with any block extend that block o = overlap(block[0], block[1], start, stop) if o == 0 # perfect overlap found=true elsif o == 1 # partial overlap block[0] = start found=true elsif o == 2 # partial overlap block[1] = stop found=true elsif o == 3 # full overlap block[0] = start block[1] = stop found=true elsif o == 4 # full overlap found=true # nothing # elsif o == 5 || o == 6 # no overlap end end if !found blocks << [start, stop] end # if any blocks now overlap then extend one block and remove # the other end end blocks.each_with_index do |block_a,a| blocks.each_with_index do |block_b,b| if a!=b o = overlap(block_a[0], block_a[1], block_b[0], block_b[1]) if o == 0 # perfect overlap block_b[0]=-1 block_b[1]=-1 elsif o == 1 # partial overlap block_a[0] = block_b[0] block_b[0] = -1 block_b[1] = -1 elsif o == 2 # partial overlap block_a[1] = block_b[1] block_b[0] = -1 block_b[1] = -1 elsif o == 3 # full overlap block_a[0] = block_b[0] block_a[1] = block_b[1] block_b[0] = -1 block_b[1] = -1 elsif o == 4 # full overlap block_b[0] = -1 block_b[1] = -1 # elsif o == 5 || o == 6# no overlap # do nothing # elsif # no overlap # do nothing end end end # each_with_index b end # each_with_index a # sum blocks to find total coverage length_of_coverage = calculate_coverage blocks @cov ||= [0, 0, 0, 0, 0] if target_length > 0 # puts "#{length_of_coverage} / #{target_length.to_f}" ref_p = length_of_coverage / target_length.to_f else ref_p = 0 end ref_contig.reference_coverage = ref_p cov.each_with_index do |c, i| if ref_p >= c @cov[i] +=1 end end total_coverage += length_of_coverage end cov.each_with_index do |p, i| @comp_stats["cov#{(100*p).to_i}".to_sym] = @cov[i] @comp_stats["p_cov#{(100*p).to_i}".to_sym] = @cov[i]/@reference.size.to_f end total_coverage / total_length.to_f end |
#overlap(astart, astop, bstart, bstop) ⇒ Object
251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 |
# File 'lib/transrate/comparative_metrics.rb', line 251 def overlap(astart, astop, bstart, bstop) if astart == bstart and astop == bstop return 0 elsif astart < bstart if astop > bstart if astop > bstop return 4 else return 2 end else return 5 # no overlap end else if bstop > astart if bstop > astop return 3 else return 1 end else return 6 # no overlap end end end |
#overlap_amount(astart, astop, bstart, bstop) ⇒ Object
277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 |
# File 'lib/transrate/comparative_metrics.rb', line 277 def overlap_amount(astart, astop, bstart, bstop) if astart == bstart and astop == bstop return 1 elsif astart < bstart if astop > bstart if astop > bstop return (bstop-bstart+1)/(astop-astart+1).to_f # 4 else return (astop-bstart+1)/(bstop-astart+1).to_f # 2 end else return 0 # 5 no overlap end else if bstop > astart if bstop > astop return (astop-astart+1)/(bstop-bstart+1).to_f # 3 else return (bstop-astart+1)/(astop-bstart+1).to_f # 1 end else return 0 # 6 no overlap end end end |
#reciprocal_best_blast ⇒ Object
51 52 53 54 55 |
# File 'lib/transrate/comparative_metrics.rb', line 51 def reciprocal_best_blast crbblast = CRB_Blast::CRB_Blast.new @assembly.file, @reference.file crbblast.run(1e-5, @threads, true) crbblast end |
#run ⇒ Object
23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
# File 'lib/transrate/comparative_metrics.rb', line 23 def run @crbblast = reciprocal_best_blast @reference_coverage = coverage @crbblast @collapse_factor = collapse_factor @crbblast.reciprocals @reciprocal_hits = @crbblast.size @rbh_per_reference = @reciprocal_hits.to_f / @reference.size.to_f @p_contigs_with_recip = @crbblast.reciprocals.size / @assembly.size.to_f @n_contigs_with_recip = @crbblast.reciprocals.size count_ref_crbbs @p_refs_with_recip = @n_refs_with_recip / @reference.size.to_f chimeras @crbblast self.run_comp_stats @has_run = true end |
#run_comp_stats ⇒ Object
38 39 40 41 42 43 44 45 46 47 48 49 |
# File 'lib/transrate/comparative_metrics.rb', line 38 def run_comp_stats @comp_stats[:CRBB_hits] = @reciprocal_hits # CRBB hits @comp_stats[:p_contigs_with_CRBB] = @p_contigs_with_recip @comp_stats[:n_contigs_with_CRBB] = @n_contigs_with_recip @comp_stats[:p_refs_with_CRBB] = @p_refs_with_recip @comp_stats[:n_refs_with_CRBB] = @n_refs_with_recip @comp_stats[:rbh_per_reference] = @rbh_per_reference @comp_stats[:reference_coverage] = @reference_coverage @comp_stats[:collapse_factor] = @collapse_factor @comp_stats[:n_chimeras] = @n_chimeras @comp_stats[:p_chimeras] = @p_chimeras end |