Class: Transrate::ComparativeMetrics
- Inherits:
-
Object
- Object
- Transrate::ComparativeMetrics
- Defined in:
- lib/transrate/comparative_metrics.rb
Instance Attribute Summary collapse
-
#comp_stats ⇒ Object
readonly
Returns the value of attribute comp_stats.
-
#has_run ⇒ Object
readonly
Returns the value of attribute has_run.
-
#rbh_per_contig ⇒ Object
readonly
Returns the value of attribute rbh_per_contig.
-
#rbh_per_reference ⇒ Object
readonly
Returns the value of attribute rbh_per_reference.
-
#reciprocal_hits ⇒ Object
readonly
Returns the value of attribute reciprocal_hits.
-
#reference_coverage ⇒ Object
readonly
Returns the value of attribute reference_coverage.
Instance Method Summary collapse
- #calculate_reference_coverage(crbblast) ⇒ Object
- #get_reference_hits(crbblast) ⇒ Object
-
#initialize(assembly, reference, threads) ⇒ ComparativeMetrics
constructor
A new instance of ComparativeMetrics.
- #per_query_contig_reference_coverage ⇒ Object
- #per_target_contig_reference_coverage(crbblast) ⇒ Object
- #run ⇒ Object
- #run_crb_blast ⇒ Object
Constructor Details
#initialize(assembly, reference, threads) ⇒ ComparativeMetrics
Returns a new instance of ComparativeMetrics.
15 16 17 18 19 20 |
# File 'lib/transrate/comparative_metrics.rb', line 15 def initialize assembly, reference, threads @assembly = assembly @reference = reference @threads = threads @comp_stats = Hash.new end |
Instance Attribute Details
#comp_stats ⇒ Object (readonly)
Returns the value of attribute comp_stats.
13 14 15 |
# File 'lib/transrate/comparative_metrics.rb', line 13 def comp_stats @comp_stats end |
#has_run ⇒ Object (readonly)
Returns the value of attribute has_run.
11 12 13 |
# File 'lib/transrate/comparative_metrics.rb', line 11 def has_run @has_run end |
#rbh_per_contig ⇒ Object (readonly)
Returns the value of attribute rbh_per_contig.
8 9 10 |
# File 'lib/transrate/comparative_metrics.rb', line 8 def rbh_per_contig @rbh_per_contig end |
#rbh_per_reference ⇒ Object (readonly)
Returns the value of attribute rbh_per_reference.
9 10 11 |
# File 'lib/transrate/comparative_metrics.rb', line 9 def rbh_per_reference @rbh_per_reference end |
#reciprocal_hits ⇒ Object (readonly)
Returns the value of attribute reciprocal_hits.
10 11 12 |
# File 'lib/transrate/comparative_metrics.rb', line 10 def reciprocal_hits @reciprocal_hits end |
#reference_coverage ⇒ Object (readonly)
Returns the value of attribute reference_coverage.
12 13 14 |
# File 'lib/transrate/comparative_metrics.rb', line 12 def reference_coverage @reference_coverage end |
Instance Method Details
#calculate_reference_coverage(crbblast) ⇒ Object
28 29 30 31 32 33 34 35 36 |
# File 'lib/transrate/comparative_metrics.rb', line 28 def calculate_reference_coverage crbblast # The reciprocals hash in crb blast has contig names as the key. # In order to look up by the reference name we need to reverse this. # Scan through the reciprocals and get this Hit objects and add them to # the @reference object for each reference sequence get_reference_hits crbblast per_query_contig_reference_coverage per_target_contig_reference_coverage crbblast end |
#get_reference_hits(crbblast) ⇒ Object
38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
# File 'lib/transrate/comparative_metrics.rb', line 38 def get_reference_hits crbblast @reference.each do |name, contig| contig.hits = [] end crbblast.reciprocals.each do |query_id, list| list.each do |hit| unless @reference.assembly.key? hit.target raise TransrateError.new "#{hit.target} not in reference" end @reference[hit.target].hits << hit end end @comp_stats[:CRBB_hits] = crbblast.size @comp_stats[:n_contigs_with_CRBB] = crbblast.reciprocals.size @comp_stats[:p_contigs_with_CRBB] = crbblast.reciprocals.size/@assembly.size.to_f end |
#per_query_contig_reference_coverage ⇒ Object
55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
# File 'lib/transrate/comparative_metrics.rb', line 55 def per_query_contig_reference_coverage # for each query contig in the @assembly find out how much it covers # the reference n_refs_with_recip = 0 total_crbb_hits = 0 @reference.each do |ref_contig_name, ref_contig| ref_contig.hits.each do |hit| # a Hit from query to target query_contig_name = hit.query unless @assembly.assembly.key? query_contig_name raise TransrateError.new "#{query_contig_name} not in assembly" end @assembly[query_contig_name].has_crb = true @assembly[query_contig_name].hits << hit raise TransrateError.new "query should not be protein" if hit.qprot if hit.tprot coverage = 3*hit.alnlen+2 - 3*hit.mismatches - 3*hit.gaps coverage /= 3.0*hit.tlen else coverage = hit.alnlen - hit.mismatches - hit.gaps coverage /= hit.tlen.to_f end @assembly[query_contig_name].reference_coverage = coverage end if ref_contig.hits.size > 0 # this reference has a crbblast hit n_refs_with_recip += 1 end total_crbb_hits += ref_contig.hits.size end @comp_stats[:rbh_per_reference] = total_crbb_hits / @reference.size.to_f @comp_stats[:n_refs_with_CRBB] = n_refs_with_recip @comp_stats[:p_refs_with_CRBB] = n_refs_with_recip / @reference.size.to_f end |
#per_target_contig_reference_coverage(crbblast) ⇒ Object
89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 |
# File 'lib/transrate/comparative_metrics.rb', line 89 def per_target_contig_reference_coverage crbblast # each target sequence in the reference can have multiple query contigs # hit it. to calculate the reference coverage you can't just add up the # alignment lengths. you have to make sure that overlaps are taken into # account coverage_thresholds = [0.25, 0.5, 0.75, 0.85, 0.95] coverage_totals = [0, 0, 0, 0, 0] prot = crbblast.target_is_prot total_coverage = 0 total_length = 0 @reference.each do |ref_contig_name, ref_contig| if prot covered = Array.new(ref_contig.length*3, false) else covered = Array.new(ref_contig.length, false) end ref_contig.hits.each_with_index do |hit, i| # a Hit from query to target if prot if hit.qstart % 3 == 0 tstart = 3*hit.tstart-4 tend = 3*hit.tend elsif hit.qstart % 3 == 1 tstart = 3*hit.tstart-2 tend = 3*hit.tend elsif hit.qstart % 3 == 2 tstart = 3*hit.tstart-3 tend = 3*hit.tend-1 end if hit.qlen % 3 == 1 tend += 1 elsif hit.qlen % 3 == 2 tend += 2 end else tstart = hit.tstart tend = hit.tend end (tstart..tend).each do |b| covered[b-1] = true # blast coords are 1 indexed end end coverage = covered.reduce(0) { |sum, v| v ? sum + 1 : sum } ref_p = coverage / covered.length.to_f ref_contig.reference_coverage = ref_p coverage_thresholds.each_with_index do |n, index| if ref_p >= n coverage_totals[index] += 1 end end total_coverage += coverage total_length += covered.length end # calculate proportion of ref sequences with coveragre over thresholds coverage_thresholds.each_with_index do |p, i| @comp_stats["cov#{(100*p).to_i}".to_sym] = coverage_totals[i] @comp_stats["p_cov#{(100*p).to_i}".to_sym] = coverage_totals[i]/@reference.size.to_f end @comp_stats[:reference_coverage] = total_coverage / total_length.to_f end |
#run ⇒ Object
22 23 24 25 26 |
# File 'lib/transrate/comparative_metrics.rb', line 22 def run crbblast = run_crb_blast calculate_reference_coverage crbblast @has_run = true end |
#run_crb_blast ⇒ Object
152 153 154 155 156 |
# File 'lib/transrate/comparative_metrics.rb', line 152 def run_crb_blast crbblast = CRB_Blast::CRB_Blast.new @assembly.file, @reference.file crbblast.run(1e-5, @threads, true) crbblast end |