Class: Macroape::PWMCompareAligned
- Inherits:
-
Object
- Object
- Macroape::PWMCompareAligned
- Defined in:
- lib/macroape/pwm_compare_aligned.rb,
lib/macroape/aligned_pair_intersection.rb
Instance Attribute Summary collapse
-
#first ⇒ Object
readonly
Returns the value of attribute first.
-
#first_length ⇒ Object
readonly
Returns the value of attribute first_length.
-
#length ⇒ Object
readonly
Returns the value of attribute length.
-
#max_pair_hash_size ⇒ Object
sets or gets limit of summary size of calculation hash.
-
#orientation ⇒ Object
readonly
Returns the value of attribute orientation.
-
#second ⇒ Object
readonly
Returns the value of attribute second.
-
#second_length ⇒ Object
readonly
Returns the value of attribute second_length.
-
#shift ⇒ Object
readonly
Returns the value of attribute shift.
Class Method Summary collapse
Instance Method Summary collapse
- #alignment_infos ⇒ Object
-
#counts_for_two_matrices(threshold_first, threshold_second) ⇒ Object
unoptimized version of this and related methods.
- #direct? ⇒ Boolean
-
#first_overlaps?(pos) ⇒ Boolean
whether first matrix overlap specified position of alignment.
- #first_pwm_alignment ⇒ Object
-
#get_counts(threshold_first, threshold_second, &count_contribution_block) ⇒ Object
block has form: {|score,letter| contribution to count by ‘letter` with `score` }.
-
#initialize(first_unaligned, second_unaligned, shift, orientation) ⇒ PWMCompareAligned
constructor
first_unaligned and second_unaligned - PWMCounting objects, not PWMs.
- #jaccard(first_threshold, second_threshold) ⇒ Object
- #jaccard_by_pvalue(pvalue) ⇒ Object
- #jaccard_by_weak_pvalue(pvalue) ⇒ Object
- #overlap ⇒ Object
-
#recalc_score_hash(scores, first_column, second_column, least_sufficient_first, least_sufficient_second) ⇒ Object
wouldn’t work without count_contribution_block.
- #revcomp? ⇒ Boolean
- #second_overlaps?(pos) ⇒ Boolean
- #second_pwm_alignment ⇒ Object
Constructor Details
#initialize(first_unaligned, second_unaligned, shift, orientation) ⇒ PWMCompareAligned
first_unaligned and second_unaligned - PWMCounting objects, not PWMs
23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
# File 'lib/macroape/pwm_compare_aligned.rb', line 23 def initialize(first_unaligned, second_unaligned, shift, orientation) @shift, @orientation = shift, orientation @first_length, @second_length = first_unaligned.length, second_unaligned.length @length = self.class.calculate_alignment_length(@first_length, @second_length, @shift) first, second = first_unaligned, second_unaligned second = second.reverse_complemented if revcomp? if shift > 0 second = second.left_augmented(shift) else first = first.left_augmented(-shift) end @first = first.right_augmented(@length - first.length) @second = second.right_augmented(@length - second.length) end |
Instance Attribute Details
#first ⇒ Object (readonly)
Returns the value of attribute first.
20 21 22 |
# File 'lib/macroape/pwm_compare_aligned.rb', line 20 def first @first end |
#first_length ⇒ Object (readonly)
Returns the value of attribute first_length.
20 21 22 |
# File 'lib/macroape/pwm_compare_aligned.rb', line 20 def first_length @first_length end |
#length ⇒ Object (readonly)
Returns the value of attribute length.
20 21 22 |
# File 'lib/macroape/pwm_compare_aligned.rb', line 20 def length @length end |
#max_pair_hash_size ⇒ Object
sets or gets limit of summary size of calculation hash. It’s a defence against overuse CPU resources by non-appropriate data
18 19 20 |
# File 'lib/macroape/pwm_compare_aligned.rb', line 18 def max_pair_hash_size @max_pair_hash_size end |
#orientation ⇒ Object (readonly)
Returns the value of attribute orientation.
20 21 22 |
# File 'lib/macroape/pwm_compare_aligned.rb', line 20 def orientation @orientation end |
#second ⇒ Object (readonly)
Returns the value of attribute second.
20 21 22 |
# File 'lib/macroape/pwm_compare_aligned.rb', line 20 def second @second end |
#second_length ⇒ Object (readonly)
Returns the value of attribute second_length.
20 21 22 |
# File 'lib/macroape/pwm_compare_aligned.rb', line 20 def second_length @second_length end |
#shift ⇒ Object (readonly)
Returns the value of attribute shift.
20 21 22 |
# File 'lib/macroape/pwm_compare_aligned.rb', line 20 def shift @shift end |
Class Method Details
.calculate_alignment_length(first_len, second_len, shift) ⇒ Object
131 132 133 134 135 136 137 |
# File 'lib/macroape/pwm_compare_aligned.rb', line 131 def self.calculate_alignment_length(first_len, second_len, shift) if shift > 0 [first_len, second_len + shift].max else [first_len - shift, second_len].max end end |
Instance Method Details
#alignment_infos ⇒ Object
73 74 75 76 77 78 79 |
# File 'lib/macroape/pwm_compare_aligned.rb', line 73 def alignment_infos {shift: shift, orientation: orientation, text: "#{first_pwm_alignment}\n#{second_pwm_alignment}", overlap: overlap, alignment_length: length} end |
#counts_for_two_matrices(threshold_first, threshold_second) ⇒ Object
unoptimized version of this and related methods
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 |
# File 'lib/macroape/aligned_pair_intersection.rb', line 4 def counts_for_two_matrices(threshold_first, threshold_second) # just not to call method each time first_background = first.background.counts second_background = second.background.counts unless first_background == second_background first_result = get_counts(threshold_first, threshold_second) {|score,letter| first_background[letter] * score } second_result = get_counts(threshold_first, threshold_second) {|score,letter| second_background[letter] * score } return [first_result, second_result] end if first.background.wordwise? result = get_counts(threshold_first, threshold_second) {|score,letter| score} [result, result] else result = get_counts(threshold_first, threshold_second) {|score,letter| first_background[letter] * score } [result, result] end end |
#direct? ⇒ Boolean
42 43 44 |
# File 'lib/macroape/pwm_compare_aligned.rb', line 42 def direct? orientation == :direct end |
#first_overlaps?(pos) ⇒ Boolean
whether first matrix overlap specified position of alignment
82 83 84 85 86 87 88 89 |
# File 'lib/macroape/pwm_compare_aligned.rb', line 82 def first_overlaps?(pos) return false unless pos >= 0 && pos < length if shift > 0 pos < first_length else pos >= -shift && pos < -shift + first_length end end |
#first_pwm_alignment ⇒ Object
53 54 55 56 57 58 59 60 61 |
# File 'lib/macroape/pwm_compare_aligned.rb', line 53 def first_pwm_alignment length.times.map do |pos| if first_overlaps?(pos) '>' else '.' end end.join end |
#get_counts(threshold_first, threshold_second, &count_contribution_block) ⇒ Object
block has form: {|score,letter| contribution to count by ‘letter` with `score` }
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
# File 'lib/macroape/aligned_pair_intersection.rb', line 24 def get_counts(threshold_first, threshold_second, &count_contribution_block) # scores_on_first_pwm, scores_on_second_pwm --> count scores = { 0 => {0 => 1} } length.times do |column| new_scores = recalc_score_hash(scores, first.matrix[column], second.matrix[column], threshold_first - first.best_suffix(column + 1), threshold_second - second.best_suffix(column + 1), &count_contribution_block) scores.replace(new_scores) if max_pair_hash_size && scores.inject(0){|sum,hsh|sum + hsh.size} > max_pair_hash_size raise 'Hash overflow in Macroape::AlignedPairIntersection#counts_for_two_matrices_with_different_probabilities' end end scores.inject(0.0){|sum,(score_first, hsh)| sum + hsh.inject(0.0){|sum,(score_second, count)| sum + count }} end |
#jaccard(first_threshold, second_threshold) ⇒ Object
100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 |
# File 'lib/macroape/pwm_compare_aligned.rb', line 100 def jaccard(first_threshold, second_threshold) f = first.count_by_threshold(first_threshold) s = second.count_by_threshold(second_threshold) if f == 0 || s == 0 return {similarity: -1, tanimoto: -1, recognized_by_both: 0, recognized_by_first: f, recognized_by_second: s, } end intersect = counts_for_two_matrices(first_threshold, second_threshold) intersect = Math.sqrt(intersect[0] * intersect[1]) union = f + s - intersect similarity = intersect.to_f / union { similarity: similarity, tanimoto: 1.0 - similarity, recognized_by_both: intersect, recognized_by_first: f, recognized_by_second: s, real_pvalue_first: f / first.vocabulary_volume, real_pvalue_second: s / second.vocabulary_volume } end |
#jaccard_by_pvalue(pvalue) ⇒ Object
119 120 121 122 123 |
# File 'lib/macroape/pwm_compare_aligned.rb', line 119 def jaccard_by_pvalue(pvalue) threshold_first = first.threshold(pvalue) threshold_second = second.threshold(pvalue) jaccard(threshold_first, threshold_second) end |
#jaccard_by_weak_pvalue(pvalue) ⇒ Object
125 126 127 128 129 |
# File 'lib/macroape/pwm_compare_aligned.rb', line 125 def jaccard_by_weak_pvalue(pvalue) threshold_first = first.weak_threshold(pvalue) threshold_second = second.weak_threshold(pvalue) jaccard(threshold_first, threshold_second) end |
#overlap ⇒ Object
49 50 51 |
# File 'lib/macroape/pwm_compare_aligned.rb', line 49 def overlap length.times.count{|pos| first_overlaps?(pos) && second_overlaps?(pos) } end |
#recalc_score_hash(scores, first_column, second_column, least_sufficient_first, least_sufficient_second) ⇒ Object
wouldn’t work without count_contribution_block
41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
# File 'lib/macroape/aligned_pair_intersection.rb', line 41 def recalc_score_hash(scores, first_column, second_column, least_sufficient_first, least_sufficient_second) new_scores = Hash.new{|h,k| h[k] = Hash.new(0)} scores.each do |score_first, second_scores| second_scores.each do |score_second, count| 4.times do |letter| new_score_first = score_first + first_column[letter] if new_score_first >= least_sufficient_first new_score_second = score_second + second_column[letter] if new_score_second >= least_sufficient_second new_scores[new_score_first][new_score_second] += yield(count, letter) end end end end end new_scores end |
#revcomp? ⇒ Boolean
45 46 47 |
# File 'lib/macroape/pwm_compare_aligned.rb', line 45 def revcomp? orientation == :revcomp end |
#second_overlaps?(pos) ⇒ Boolean
91 92 93 94 95 96 97 98 |
# File 'lib/macroape/pwm_compare_aligned.rb', line 91 def second_overlaps?(pos) return false unless pos >= 0 && pos < length if shift > 0 pos >= shift && pos < shift + second_length else pos < second_length end end |
#second_pwm_alignment ⇒ Object
63 64 65 66 67 68 69 70 71 |
# File 'lib/macroape/pwm_compare_aligned.rb', line 63 def second_pwm_alignment length.times.map do |pos| if second_overlaps?(pos) direct? ? '>' : '<' else '.' end end.join end |