Class: Macroape::PWMCounting
- Inherits:
-
Object
- Object
- Macroape::PWMCounting
- Defined in:
- lib/macroape/pwm_counting.rb,
lib/macroape/pwm_compare_aligned.rb
Instance Attribute Summary collapse
-
#background ⇒ Object
Returns the value of attribute background.
-
#max_hash_size ⇒ Object
Returns the value of attribute max_hash_size.
-
#pwm ⇒ Object
Returns the value of attribute pwm.
Instance Method Summary collapse
- #best_score ⇒ Object
-
#best_suffix(i) ⇒ Object
best score of suffix s.
- #count_by_threshold(threshold) ⇒ Object
- #count_distribution ⇒ Object
- #count_distribution_after_threshold(threshold) ⇒ Object
- #count_distribution_under_pvalue(max_pvalue) ⇒ Object
- #counts_by_thresholds(*thresholds) ⇒ Object
-
#initialize(pwm, background: Bioinform::Background::Wordwise, max_hash_size: nil) ⇒ PWMCounting
constructor
A new instance of PWMCounting.
- #left_augmented(n) ⇒ Object
- #length ⇒ Object
- #matrix ⇒ Object
- #pvalue_by_threshold(threshold) ⇒ Object
- #recalc_score_hash(scores, column, least_sufficient) ⇒ Object
- #reverse_complemented ⇒ Object
- #right_augmented(n) ⇒ Object
- #score_mean ⇒ Object
- #score_variance ⇒ Object
- #threshold(pvalue) ⇒ Object
- #threshold_and_real_pvalue(pvalue) ⇒ Object
- #threshold_gauss_estimation(pvalue) ⇒ Object
- #thresholds(*pvalues) ⇒ Object
-
#thresholds_by_pvalues(*pvalues) ⇒ Object
ret-value: hash => [thresholds, counts] thresholds = left_threshold ..
- #vocabulary_volume ⇒ Object
- #weak_threshold(pvalue) ⇒ Object
- #weak_threshold_and_real_pvalue(pvalue) ⇒ Object
-
#weak_thresholds(*pvalues) ⇒ Object
“weak” means that threshold has real pvalue not less than given pvalue, while usual threshold not greater.
- #worst_score ⇒ Object
- #worst_suffix(i) ⇒ Object
Constructor Details
#initialize(pwm, background: Bioinform::Background::Wordwise, max_hash_size: nil) ⇒ PWMCounting
Returns a new instance of PWMCounting.
9 10 11 12 13 |
# File 'lib/macroape/pwm_counting.rb', line 9 def initialize(pwm, background: Bioinform::Background::Wordwise, max_hash_size: nil) @pwm = pwm @background = background @max_hash_size = max_hash_size end |
Instance Attribute Details
#background ⇒ Object
Returns the value of attribute background.
7 8 9 |
# File 'lib/macroape/pwm_counting.rb', line 7 def background @background end |
#max_hash_size ⇒ Object
Returns the value of attribute max_hash_size.
7 8 9 |
# File 'lib/macroape/pwm_counting.rb', line 7 def max_hash_size @max_hash_size end |
#pwm ⇒ Object
Returns the value of attribute pwm.
7 8 9 |
# File 'lib/macroape/pwm_counting.rb', line 7 def pwm @pwm end |
Instance Method Details
#best_score ⇒ Object
31 32 33 |
# File 'lib/macroape/pwm_counting.rb', line 31 def best_score best_suffix(0) end |
#best_suffix(i) ⇒ Object
best score of suffix s
40 41 42 |
# File 'lib/macroape/pwm_counting.rb', line 40 def best_suffix(i) matrix[i...length].map(&:max).inject(0.0, &:+) end |
#count_by_threshold(threshold) ⇒ Object
167 168 169 |
# File 'lib/macroape/pwm_counting.rb', line 167 def count_by_threshold(threshold) counts_by_thresholds(threshold)[threshold] end |
#count_distribution ⇒ Object
142 143 144 |
# File 'lib/macroape/pwm_counting.rb', line 142 def count_distribution @count_distribution ||= count_distribution_after_threshold(worst_score) end |
#count_distribution_after_threshold(threshold) ⇒ Object
132 133 134 135 136 137 138 139 140 |
# File 'lib/macroape/pwm_counting.rb', line 132 def count_distribution_after_threshold(threshold) return @count_distribution.select{|score, count| score >= threshold} if @count_distribution scores = { 0 => 1 } length.times do |column| scores.replace recalc_score_hash(scores, matrix[column], threshold - best_suffix(column + 1)) raise 'Hash overflow in PWM::ThresholdByPvalue#count_distribution_after_threshold' if max_hash_size && scores.size > max_hash_size end scores end |
#count_distribution_under_pvalue(max_pvalue) ⇒ Object
93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 |
# File 'lib/macroape/pwm_counting.rb', line 93 def count_distribution_under_pvalue(max_pvalue) cnt_distribution = {} look_for_count = max_pvalue * vocabulary_volume until cnt_distribution.inject(0.0){|sum,(score,count)| sum + count} >= look_for_count begin approximate_threshold = threshold_gauss_estimation(max_pvalue) rescue approximate_threshold = worst_score end cnt_distribution = count_distribution_after_threshold(approximate_threshold) max_pvalue *=2 # if estimation counted too small amount of words - try to lower threshold estimation by doubling pvalue end cnt_distribution end |
#counts_by_thresholds(*thresholds) ⇒ Object
159 160 161 162 163 164 165 |
# File 'lib/macroape/pwm_counting.rb', line 159 def counts_by_thresholds(*thresholds) scores = count_distribution_after_threshold(thresholds.min) thresholds.inject({}){ |hsh, threshold| hsh[threshold] = scores.inject(0.0){|sum,(score,count)| (score >= threshold) ? sum + count : sum} hsh } end |
#left_augmented(n) ⇒ Object
5 6 7 |
# File 'lib/macroape/pwm_compare_aligned.rb', line 5 def left_augmented(n) PWMCounting.new(pwm.left_augmented(n), background: background, max_hash_size: max_hash_size) end |
#length ⇒ Object
27 28 29 |
# File 'lib/macroape/pwm_counting.rb', line 27 def length pwm.length end |
#matrix ⇒ Object
15 16 17 |
# File 'lib/macroape/pwm_counting.rb', line 15 def matrix pwm.matrix end |
#pvalue_by_threshold(threshold) ⇒ Object
171 172 173 |
# File 'lib/macroape/pwm_counting.rb', line 171 def pvalue_by_threshold(threshold) count_by_threshold(threshold) / vocabulary_volume end |
#recalc_score_hash(scores, column, least_sufficient) ⇒ Object
146 147 148 149 150 151 152 153 154 155 156 157 |
# File 'lib/macroape/pwm_counting.rb', line 146 def recalc_score_hash(scores, column, least_sufficient) new_scores = Hash.new(0) scores.each do |score, count| 4.times do |letter| new_score = score + column[letter] if new_score >= least_sufficient new_scores[new_score] += count * background.counts[letter] end end end new_scores end |
#reverse_complemented ⇒ Object
11 12 13 |
# File 'lib/macroape/pwm_compare_aligned.rb', line 11 def reverse_complemented PWMCounting.new(pwm.reverse_complemented, background: background, max_hash_size: max_hash_size) end |
#right_augmented(n) ⇒ Object
8 9 10 |
# File 'lib/macroape/pwm_compare_aligned.rb', line 8 def right_augmented(n) PWMCounting.new(pwm.right_augmented(n), background: background, max_hash_size: max_hash_size) end |
#score_mean ⇒ Object
48 49 50 |
# File 'lib/macroape/pwm_counting.rb', line 48 def score_mean pwm.each_position.inject(0.0){|mean, position| mean + background.mean(position) } end |
#score_variance ⇒ Object
52 53 54 |
# File 'lib/macroape/pwm_counting.rb', line 52 def score_variance pwm.each_position.inject(0.0){|variance, position| variance + background.mean_square(position) - background.mean(position) **2 } end |
#threshold(pvalue) ⇒ Object
62 63 64 |
# File 'lib/macroape/pwm_counting.rb', line 62 def threshold(pvalue) thresholds(pvalue){|_, thresh, _| return thresh } end |
#threshold_and_real_pvalue(pvalue) ⇒ Object
65 66 67 |
# File 'lib/macroape/pwm_counting.rb', line 65 def threshold_and_real_pvalue(pvalue) thresholds(pvalue){|_, thresh, real_pv| return thresh, real_pv } end |
#threshold_gauss_estimation(pvalue) ⇒ Object
23 24 25 |
# File 'lib/macroape/pwm_counting.rb', line 23 def threshold_gauss_estimation(max_pvalue) pwm.threshold_gauss_estimation(max_pvalue) end |
#thresholds(*pvalues) ⇒ Object
75 76 77 78 79 80 81 |
# File 'lib/macroape/pwm_counting.rb', line 75 def thresholds(*pvalues) thresholds_by_pvalues(*pvalues).each do |pvalue,(thresholds, counts)| threshold = thresholds.begin + 0.1 * (thresholds.end - thresholds.begin) real_pvalue = counts.end.to_f / vocabulary_volume yield pvalue, threshold, real_pvalue end end |
#thresholds_by_pvalues(*pvalues) ⇒ Object
ret-value: hash => [thresholds, counts] thresholds = left_threshold .. right_threshold (left_threshold < right_threshold) counts = left_count .. right_count (left_count > right_count)
113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
# File 'lib/macroape/pwm_counting.rb', line 113 def thresholds_by_pvalues(*pvalues) sorted_scores = count_distribution_under_pvalue(pvalues.max).sort.reverse scores = sorted_scores.map{|score,count| score} counts = sorted_scores.map{|score,count| count} partial_sums = counts.partial_sums results = {} pvalue_counts = pvalues.sort.each_with_object({}){|pvalue, hsh| hsh[pvalue] = pvalue * vocabulary_volume } pvalue_counts.map do |pvalue,look_for_count| ind = partial_sums.index{|sum| sum >= look_for_count} minscore, count_at_minscore = scores[ind], partial_sums[ind] maxscore, count_at_maxscore = ind > 0 ? [ scores[ind-1], partial_sums[ind-1] ] : [ best_score + 1.0, 0.0 ] results[pvalue] = [(minscore .. maxscore), (count_at_minscore .. count_at_maxscore)] end results end |
#vocabulary_volume ⇒ Object
19 20 21 |
# File 'lib/macroape/pwm_counting.rb', line 19 def vocabulary_volume background.volume ** length end |
#weak_threshold(pvalue) ⇒ Object
68 69 70 |
# File 'lib/macroape/pwm_counting.rb', line 68 def weak_threshold(pvalue) weak_thresholds(pvalue){|_, thresh, _| return thresh } end |
#weak_threshold_and_real_pvalue(pvalue) ⇒ Object
71 72 73 |
# File 'lib/macroape/pwm_counting.rb', line 71 def weak_threshold_and_real_pvalue(pvalue) weak_thresholds(pvalue){|_, thresh, real_pv| return thresh, real_pv } end |
#weak_thresholds(*pvalues) ⇒ Object
“weak” means that threshold has real pvalue not less than given pvalue, while usual threshold not greater
84 85 86 87 88 89 90 |
# File 'lib/macroape/pwm_counting.rb', line 84 def weak_thresholds(*pvalues) thresholds_by_pvalues(*pvalues).each do |pvalue,(thresholds, counts)| threshold = thresholds.begin.to_f real_pvalue = counts.begin.to_f / vocabulary_volume yield pvalue, threshold, real_pvalue end end |
#worst_score ⇒ Object
35 36 37 |
# File 'lib/macroape/pwm_counting.rb', line 35 def worst_score worst_suffix(0) end |
#worst_suffix(i) ⇒ Object
44 45 46 |
# File 'lib/macroape/pwm_counting.rb', line 44 def worst_suffix(i) matrix[i...length].map(&:min).inject(0.0, &:+) end |