Class: Macroape::PWMCounting

Inherits:
Object
  • Object
show all
Defined in:
lib/macroape/pwm_counting.rb,
lib/macroape/pwm_compare_aligned.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(pwm, background: Bioinform::Background::Wordwise, max_hash_size: nil) ⇒ PWMCounting

Returns a new instance of PWMCounting.



9
10
11
12
13
# File 'lib/macroape/pwm_counting.rb', line 9

def initialize(pwm, background: Bioinform::Background::Wordwise, max_hash_size: nil)
  @pwm = pwm
  @background = background
  @max_hash_size = max_hash_size
end

Instance Attribute Details

#backgroundObject

Returns the value of attribute background.



7
8
9
# File 'lib/macroape/pwm_counting.rb', line 7

def background
  @background
end

#max_hash_sizeObject

Returns the value of attribute max_hash_size.



7
8
9
# File 'lib/macroape/pwm_counting.rb', line 7

def max_hash_size
  @max_hash_size
end

#pwmObject

Returns the value of attribute pwm.



7
8
9
# File 'lib/macroape/pwm_counting.rb', line 7

def pwm
  @pwm
end

Instance Method Details

#best_scoreObject



31
32
33
# File 'lib/macroape/pwm_counting.rb', line 31

def best_score
  best_suffix(0)
end

#best_suffix(i) ⇒ Object

best score of suffix s



40
41
42
# File 'lib/macroape/pwm_counting.rb', line 40

def best_suffix(i)
  matrix[i...length].map(&:max).inject(0.0, &:+)
end

#count_by_threshold(threshold) ⇒ Object



167
168
169
# File 'lib/macroape/pwm_counting.rb', line 167

def count_by_threshold(threshold)
  counts_by_thresholds(threshold)[threshold]
end

#count_distributionObject



142
143
144
# File 'lib/macroape/pwm_counting.rb', line 142

def count_distribution
  @count_distribution ||= count_distribution_after_threshold(worst_score)
end

#count_distribution_after_threshold(threshold) ⇒ Object



132
133
134
135
136
137
138
139
140
# File 'lib/macroape/pwm_counting.rb', line 132

def count_distribution_after_threshold(threshold)
  return @count_distribution.select{|score, count| score >= threshold}  if @count_distribution
  scores = { 0 => 1 }
  length.times do |column|
    scores.replace recalc_score_hash(scores, matrix[column], threshold - best_suffix(column + 1))
    raise 'Hash overflow in PWM::ThresholdByPvalue#count_distribution_after_threshold'  if max_hash_size && scores.size > max_hash_size
  end
  scores
end

#count_distribution_under_pvalue(max_pvalue) ⇒ Object



93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# File 'lib/macroape/pwm_counting.rb', line 93

def count_distribution_under_pvalue(max_pvalue)
  cnt_distribution = {}
  look_for_count = max_pvalue * vocabulary_volume
  until cnt_distribution.inject(0.0){|sum,(score,count)| sum + count} >= look_for_count
    begin
      approximate_threshold = threshold_gauss_estimation(max_pvalue)
    rescue
      approximate_threshold = worst_score
    end
    cnt_distribution = count_distribution_after_threshold(approximate_threshold)
    max_pvalue *=2 # if estimation counted too small amount of words - try to lower threshold estimation by doubling pvalue
  end

  cnt_distribution
end

#counts_by_thresholds(*thresholds) ⇒ Object



159
160
161
162
163
164
165
# File 'lib/macroape/pwm_counting.rb', line 159

def counts_by_thresholds(*thresholds)
  scores = count_distribution_after_threshold(thresholds.min)
  thresholds.inject({}){ |hsh, threshold|
    hsh[threshold] = scores.inject(0.0){|sum,(score,count)|  (score >= threshold) ? sum + count : sum}
    hsh
  }
end

#left_augmented(n) ⇒ Object



5
6
7
# File 'lib/macroape/pwm_compare_aligned.rb', line 5

def left_augmented(n)
  PWMCounting.new(pwm.left_augmented(n), background: background, max_hash_size: max_hash_size)
end

#lengthObject



27
28
29
# File 'lib/macroape/pwm_counting.rb', line 27

def length
  pwm.length
end

#matrixObject



15
16
17
# File 'lib/macroape/pwm_counting.rb', line 15

def matrix
  pwm.matrix
end

#pvalue_by_threshold(threshold) ⇒ Object



171
172
173
# File 'lib/macroape/pwm_counting.rb', line 171

def pvalue_by_threshold(threshold)
  count_by_threshold(threshold) / vocabulary_volume
end

#recalc_score_hash(scores, column, least_sufficient) ⇒ Object



146
147
148
149
150
151
152
153
154
155
156
157
# File 'lib/macroape/pwm_counting.rb', line 146

def recalc_score_hash(scores, column, least_sufficient)
  new_scores = Hash.new(0)
  scores.each do |score, count|
    4.times do |letter|
      new_score = score + column[letter]
      if new_score >= least_sufficient
        new_scores[new_score] += count * background.counts[letter]
      end
    end
  end
  new_scores
end

#reverse_complementedObject



11
12
13
# File 'lib/macroape/pwm_compare_aligned.rb', line 11

def reverse_complemented
  PWMCounting.new(pwm.reverse_complemented, background: background, max_hash_size: max_hash_size)
end

#right_augmented(n) ⇒ Object



8
9
10
# File 'lib/macroape/pwm_compare_aligned.rb', line 8

def right_augmented(n)
  PWMCounting.new(pwm.right_augmented(n), background: background, max_hash_size: max_hash_size)
end

#score_meanObject



48
49
50
# File 'lib/macroape/pwm_counting.rb', line 48

def score_mean
  pwm.each_position.inject(0.0){|mean, position| mean + background.mean(position) }
end

#score_varianceObject



52
53
54
# File 'lib/macroape/pwm_counting.rb', line 52

def score_variance
  pwm.each_position.inject(0.0){|variance, position| variance + background.mean_square(position) - background.mean(position) **2 }
end

#threshold(pvalue) ⇒ Object



62
63
64
# File 'lib/macroape/pwm_counting.rb', line 62

def threshold(pvalue)
  thresholds(pvalue){|_, thresh, _| return thresh }
end

#threshold_and_real_pvalue(pvalue) ⇒ Object



65
66
67
# File 'lib/macroape/pwm_counting.rb', line 65

def threshold_and_real_pvalue(pvalue)
  thresholds(pvalue){|_, thresh, real_pv| return thresh, real_pv }
end

#threshold_gauss_estimation(pvalue) ⇒ Object



23
24
25
# File 'lib/macroape/pwm_counting.rb', line 23

def threshold_gauss_estimation(max_pvalue)
  pwm.threshold_gauss_estimation(max_pvalue)
end

#thresholds(*pvalues) ⇒ Object



75
76
77
78
79
80
81
# File 'lib/macroape/pwm_counting.rb', line 75

def thresholds(*pvalues)
  thresholds_by_pvalues(*pvalues).each do |pvalue,(thresholds, counts)|
    threshold = thresholds.begin + 0.1 * (thresholds.end - thresholds.begin)
    real_pvalue = counts.end.to_f / vocabulary_volume
    yield pvalue, threshold, real_pvalue
  end
end

#thresholds_by_pvalues(*pvalues) ⇒ Object

ret-value: hash => [thresholds, counts] thresholds = left_threshold .. right_threshold (left_threshold < right_threshold) counts = left_count .. right_count (left_count > right_count)



113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# File 'lib/macroape/pwm_counting.rb', line 113

def thresholds_by_pvalues(*pvalues)
  sorted_scores = count_distribution_under_pvalue(pvalues.max).sort.reverse
  scores = sorted_scores.map{|score,count| score}
  counts = sorted_scores.map{|score,count| count}
  partial_sums = counts.partial_sums

  results = {}

  pvalue_counts = pvalues.sort.each_with_object({}){|pvalue, hsh| hsh[pvalue] = pvalue * vocabulary_volume }
  pvalue_counts.map do |pvalue,look_for_count|
    ind = partial_sums.index{|sum| sum >= look_for_count}
    minscore, count_at_minscore = scores[ind], partial_sums[ind]
    maxscore, count_at_maxscore = ind > 0  ?  [ scores[ind-1],  partial_sums[ind-1] ]  :  [ best_score + 1.0, 0.0 ]
    results[pvalue] = [(minscore .. maxscore), (count_at_minscore .. count_at_maxscore)]
  end

  results
end

#vocabulary_volumeObject



19
20
21
# File 'lib/macroape/pwm_counting.rb', line 19

def vocabulary_volume
  background.volume ** length
end

#weak_threshold(pvalue) ⇒ Object



68
69
70
# File 'lib/macroape/pwm_counting.rb', line 68

def weak_threshold(pvalue)
  weak_thresholds(pvalue){|_, thresh, _| return thresh }
end

#weak_threshold_and_real_pvalue(pvalue) ⇒ Object



71
72
73
# File 'lib/macroape/pwm_counting.rb', line 71

def weak_threshold_and_real_pvalue(pvalue)
  weak_thresholds(pvalue){|_, thresh, real_pv| return thresh, real_pv }
end

#weak_thresholds(*pvalues) ⇒ Object

“weak” means that threshold has real pvalue not less than given pvalue, while usual threshold not greater



84
85
86
87
88
89
90
# File 'lib/macroape/pwm_counting.rb', line 84

def weak_thresholds(*pvalues)
  thresholds_by_pvalues(*pvalues).each do |pvalue,(thresholds, counts)|
    threshold = thresholds.begin.to_f
    real_pvalue = counts.begin.to_f / vocabulary_volume
    yield pvalue, threshold, real_pvalue
  end
end

#worst_scoreObject



35
36
37
# File 'lib/macroape/pwm_counting.rb', line 35

def worst_score
  worst_suffix(0)
end

#worst_suffix(i) ⇒ Object



44
45
46
# File 'lib/macroape/pwm_counting.rb', line 44

def worst_suffix(i)
  matrix[i...length].map(&:min).inject(0.0, &:+)
end