Class: Bioinform::PWM

Inherits:
Object
  • Object
show all
Defined in:
lib/macroape/counting.rb

Instance Method Summary collapse

Instance Method Details

#count_by_threshold(threshold) ⇒ Object



113
114
115
# File 'lib/macroape/counting.rb', line 113

def count_by_threshold(threshold)
  counts_by_thresholds(threshold)[threshold]
end

#count_distributionObject



88
89
90
# File 'lib/macroape/counting.rb', line 88

def count_distribution
  @count_distribution ||= count_distribution_after_threshold(worst_score)
end

#count_distribution_after_threshold(threshold) ⇒ Object



78
79
80
81
82
83
84
85
86
# File 'lib/macroape/counting.rb', line 78

def count_distribution_after_threshold(threshold)
  return @count_distribution.select{|score, count| score >= threshold}  if @count_distribution
  scores = { 0 => 1 }
  length.times do |column|
    scores.replace recalc_score_hash(scores, @matrix[column], threshold - best_suffix(column + 1))
    raise 'Hash overflow in PWM::ThresholdByPvalue#count_distribution_after_threshold'  if max_hash_size && scores.size > max_hash_size
  end
  scores
end

#count_distribution_under_pvalue(max_pvalue) ⇒ Object



39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# File 'lib/macroape/counting.rb', line 39

def count_distribution_under_pvalue(max_pvalue)
  cnt_distribution = {}
  look_for_count = max_pvalue * vocabulary_volume
  until cnt_distribution.inject(0.0){|sum,(score,count)| sum + count} >= look_for_count
    begin
      approximate_threshold = threshold_gauss_estimation(max_pvalue)
    rescue
      approximate_threshold = worst_score
    end
    cnt_distribution = count_distribution_after_threshold(approximate_threshold)
    max_pvalue *=2 # if estimation counted too small amount of words - try to lower threshold estimation by doubling pvalue
  end

  cnt_distribution
end

#counts_by_thresholds(*thresholds) ⇒ Object



105
106
107
108
109
110
111
# File 'lib/macroape/counting.rb', line 105

def counts_by_thresholds(*thresholds)
  scores = count_distribution_after_threshold(thresholds.min)
  thresholds.inject({}){ |hsh, threshold|
    hsh[threshold] = scores.inject(0.0){|sum,(score,count)|  (score >= threshold) ? sum + count : sum}
    hsh
  }
end

#pvalue_by_threshold(threshold) ⇒ Object



117
118
119
# File 'lib/macroape/counting.rb', line 117

def pvalue_by_threshold(threshold)
  count_by_threshold(threshold) / vocabulary_volume
end

#recalc_score_hash(scores, column, least_sufficient) ⇒ Object



92
93
94
95
96
97
98
99
100
101
102
103
# File 'lib/macroape/counting.rb', line 92

def recalc_score_hash(scores, column, least_sufficient)
  new_scores = Hash.new(0)
  scores.each do |score, count|
    4.times do |letter|
      new_score = score + column[letter]
      if new_score >= least_sufficient
        new_scores[new_score] += count * background[letter]
      end
    end
  end
  new_scores
end

#threshold(pvalue) ⇒ Object



8
9
10
# File 'lib/macroape/counting.rb', line 8

def threshold(pvalue)
  thresholds(pvalue){|_, thresh, _| return thresh }
end

#threshold_and_real_pvalue(pvalue) ⇒ Object



11
12
13
# File 'lib/macroape/counting.rb', line 11

def threshold_and_real_pvalue(pvalue)
  thresholds(pvalue){|_, thresh, real_pv| return thresh, real_pv }
end

#thresholds(*pvalues) ⇒ Object



21
22
23
24
25
26
27
# File 'lib/macroape/counting.rb', line 21

def thresholds(*pvalues)
  thresholds_by_pvalues(*pvalues).each do |pvalue,(thresholds, counts)|
    threshold = thresholds.begin + 0.1 * (thresholds.end - thresholds.begin)
    real_pvalue = counts.end.to_f / vocabulary_volume
    yield pvalue, threshold, real_pvalue
  end
end

#thresholds_by_pvalues(*pvalues) ⇒ Object

ret-value: hash => [thresholds, counts] thresholds = left_threshold .. right_threshold (left_threshold < right_threshold) counts = left_count .. right_count (left_count > right_count)



59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# File 'lib/macroape/counting.rb', line 59

def thresholds_by_pvalues(*pvalues)
  sorted_scores = count_distribution_under_pvalue(pvalues.max).sort.reverse
  scores = sorted_scores.map{|score,count| score}
  counts = sorted_scores.map{|score,count| count}
  partial_sums = counts.partial_sums

  results = {}

  pvalue_counts = pvalues.sort.collect_hash{|pvalue| [pvalue, pvalue * vocabulary_volume] }
  pvalue_counts.map do |pvalue,look_for_count|
    ind = partial_sums.index{|sum| sum >= look_for_count}
    minscore, count_at_minscore = scores[ind], partial_sums[ind]
    maxscore, count_at_maxscore = ind > 0  ?  [ scores[ind-1],  partial_sums[ind-1] ]  :  [ best_score + 1.0, 0.0 ]
    results[pvalue] = [(minscore .. maxscore), (count_at_minscore .. count_at_maxscore)]
  end

  results
end

#weak_threshold(pvalue) ⇒ Object



14
15
16
# File 'lib/macroape/counting.rb', line 14

def weak_threshold(pvalue)
  weak_thresholds(pvalue){|_, thresh, _| return thresh }
end

#weak_threshold_and_real_pvalue(pvalue) ⇒ Object



17
18
19
# File 'lib/macroape/counting.rb', line 17

def weak_threshold_and_real_pvalue(pvalue)
  weak_thresholds(pvalue){|_, thresh, real_pv| return thresh, real_pv }
end

#weak_thresholds(*pvalues) ⇒ Object

“weak” means that threshold has real pvalue not less than given pvalue, while usual threshold not greater



30
31
32
33
34
35
36
# File 'lib/macroape/counting.rb', line 30

def weak_thresholds(*pvalues)
  thresholds_by_pvalues(*pvalues).each do |pvalue,(thresholds, counts)|
    threshold = thresholds.begin.to_f
    real_pvalue = counts.begin.to_f / vocabulary_volume
    yield pvalue, threshold, real_pvalue
  end
end