Class: Macroape::PWMCompareAligned

Inherits:
Object
  • Object
show all
Defined in:
lib/macroape/pwm_compare_aligned.rb,
lib/macroape/aligned_pair_intersection.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(first_unaligned, second_unaligned, shift, orientation) ⇒ PWMCompareAligned

first_unaligned and second_unaligned - PWMCounting objects, not PWMs



23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# File 'lib/macroape/pwm_compare_aligned.rb', line 23

def initialize(first_unaligned, second_unaligned, shift, orientation)
  @shift, @orientation = shift, orientation

  @first_length, @second_length = first_unaligned.length, second_unaligned.length
  @length = self.class.calculate_alignment_length(@first_length, @second_length, @shift)

  first, second = first_unaligned, second_unaligned
  second = second.reverse_complemented  if revcomp?

  if shift > 0
    second = second.left_augmented(shift)
  else
    first = first.left_augmented(-shift)
  end

  @first = first.right_augmented(@length - first.length)
  @second = second.right_augmented(@length - second.length)
end

Instance Attribute Details

#firstObject (readonly)

Returns the value of attribute first.



20
21
22
# File 'lib/macroape/pwm_compare_aligned.rb', line 20

def first
  @first
end

#first_lengthObject (readonly)

Returns the value of attribute first_length.



20
21
22
# File 'lib/macroape/pwm_compare_aligned.rb', line 20

def first_length
  @first_length
end

#lengthObject (readonly)

Returns the value of attribute length.



20
21
22
# File 'lib/macroape/pwm_compare_aligned.rb', line 20

def length
  @length
end

#max_pair_hash_sizeObject

sets or gets limit of summary size of calculation hash. It’s a defence against overuse CPU resources by non-appropriate data



18
19
20
# File 'lib/macroape/pwm_compare_aligned.rb', line 18

def max_pair_hash_size
  @max_pair_hash_size
end

#orientationObject (readonly)

Returns the value of attribute orientation.



20
21
22
# File 'lib/macroape/pwm_compare_aligned.rb', line 20

def orientation
  @orientation
end

#secondObject (readonly)

Returns the value of attribute second.



20
21
22
# File 'lib/macroape/pwm_compare_aligned.rb', line 20

def second
  @second
end

#second_lengthObject (readonly)

Returns the value of attribute second_length.



20
21
22
# File 'lib/macroape/pwm_compare_aligned.rb', line 20

def second_length
  @second_length
end

#shiftObject (readonly)

Returns the value of attribute shift.



20
21
22
# File 'lib/macroape/pwm_compare_aligned.rb', line 20

def shift
  @shift
end

Class Method Details

.calculate_alignment_length(first_len, second_len, shift) ⇒ Object



131
132
133
134
135
136
137
# File 'lib/macroape/pwm_compare_aligned.rb', line 131

def self.calculate_alignment_length(first_len, second_len, shift)
  if shift > 0
    [first_len, second_len + shift].max
  else
    [first_len - shift, second_len].max
  end
end

Instance Method Details

#alignment_infosObject



73
74
75
76
77
78
79
# File 'lib/macroape/pwm_compare_aligned.rb', line 73

def alignment_infos
  {shift: shift,
  orientation: orientation,
  text: "#{first_pwm_alignment}\n#{second_pwm_alignment}",
  overlap: overlap,
  alignment_length: length}
end

#counts_for_two_matrices(threshold_first, threshold_second) ⇒ Object

unoptimized version of this and related methods



4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# File 'lib/macroape/aligned_pair_intersection.rb', line 4

def counts_for_two_matrices(threshold_first, threshold_second)
  # just not to call method each time
  first_background = first.background.counts
  second_background = second.background.counts
  unless first_background == second_background
    first_result = get_counts(threshold_first, threshold_second) {|score,letter| first_background[letter] * score }
    second_result = get_counts(threshold_first, threshold_second) {|score,letter| second_background[letter] * score }
    return [first_result, second_result]
  end
  if first.background.wordwise?
    result = get_counts(threshold_first, threshold_second) {|score,letter| score}
    [result, result]
  else
    result = get_counts(threshold_first, threshold_second) {|score,letter| first_background[letter] * score }
    [result, result]
  end
end

#direct?Boolean

Returns:

  • (Boolean)


42
43
44
# File 'lib/macroape/pwm_compare_aligned.rb', line 42

def direct?
  orientation == :direct
end

#first_overlaps?(pos) ⇒ Boolean

whether first matrix overlap specified position of alignment

Returns:

  • (Boolean)


82
83
84
85
86
87
88
89
# File 'lib/macroape/pwm_compare_aligned.rb', line 82

def first_overlaps?(pos)
  return false unless pos >= 0 && pos < length
  if shift > 0
    pos < first_length
  else
    pos >= -shift && pos < -shift + first_length
  end
end

#first_pwm_alignmentObject



53
54
55
56
57
58
59
60
61
# File 'lib/macroape/pwm_compare_aligned.rb', line 53

def first_pwm_alignment
  length.times.map do |pos|
    if first_overlaps?(pos)
      '>'
    else
      '.'
    end
  end.join
end

#get_counts(threshold_first, threshold_second, &count_contribution_block) ⇒ Object

block has form: {|score,letter| contribution to count by ‘letter` with `score` }



24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# File 'lib/macroape/aligned_pair_intersection.rb', line 24

def get_counts(threshold_first, threshold_second, &count_contribution_block)
  # scores_on_first_pwm, scores_on_second_pwm --> count
  scores = { 0 => {0 => 1} }
  length.times do |column|
    new_scores = recalc_score_hash(scores,
                      first.matrix[column], second.matrix[column],
                      threshold_first - first.best_suffix(column + 1),
                      threshold_second - second.best_suffix(column + 1), &count_contribution_block)
    scores.replace(new_scores)
    if max_pair_hash_size && scores.inject(0){|sum,hsh|sum + hsh.size} > max_pair_hash_size
      raise 'Hash overflow in Macroape::AlignedPairIntersection#counts_for_two_matrices_with_different_probabilities'
    end
  end
  scores.inject(0.0){|sum,(score_first, hsh)| sum + hsh.inject(0.0){|sum,(score_second, count)| sum + count }}
end

#jaccard(first_threshold, second_threshold) ⇒ Object



100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# File 'lib/macroape/pwm_compare_aligned.rb', line 100

def jaccard(first_threshold, second_threshold)
  f = first.count_by_threshold(first_threshold)
  s = second.count_by_threshold(second_threshold)
  if f == 0 || s == 0
    return {similarity: -1, tanimoto: -1, recognized_by_both: 0,
          recognized_by_first: f,
          recognized_by_second: s,
        }
  end

  intersect = counts_for_two_matrices(first_threshold, second_threshold)
  intersect = Math.sqrt(intersect[0] * intersect[1])
  union = f + s - intersect
  similarity = intersect.to_f / union
  { similarity: similarity,  tanimoto: 1.0 - similarity,  recognized_by_both: intersect,
    recognized_by_first: f,  recognized_by_second: s,
    real_pvalue_first: f / first.vocabulary_volume, real_pvalue_second: s / second.vocabulary_volume }
end

#jaccard_by_pvalue(pvalue) ⇒ Object



119
120
121
122
123
# File 'lib/macroape/pwm_compare_aligned.rb', line 119

def jaccard_by_pvalue(pvalue)
  threshold_first = first.threshold(pvalue)
  threshold_second = second.threshold(pvalue)
  jaccard(threshold_first, threshold_second)
end

#jaccard_by_weak_pvalue(pvalue) ⇒ Object



125
126
127
128
129
# File 'lib/macroape/pwm_compare_aligned.rb', line 125

def jaccard_by_weak_pvalue(pvalue)
  threshold_first = first.weak_threshold(pvalue)
  threshold_second = second.weak_threshold(pvalue)
  jaccard(threshold_first, threshold_second)
end

#overlapObject



49
50
51
# File 'lib/macroape/pwm_compare_aligned.rb', line 49

def overlap
  length.times.count{|pos| first_overlaps?(pos) && second_overlaps?(pos) }
end

#recalc_score_hash(scores, first_column, second_column, least_sufficient_first, least_sufficient_second) ⇒ Object

wouldn’t work without count_contribution_block



41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# File 'lib/macroape/aligned_pair_intersection.rb', line 41

def recalc_score_hash(scores, first_column, second_column, least_sufficient_first, least_sufficient_second)
  new_scores = Hash.new{|h,k| h[k] = Hash.new(0)}
  scores.each do |score_first, second_scores|
    second_scores.each do |score_second, count|

      4.times do |letter|
        new_score_first = score_first + first_column[letter]
        if new_score_first >= least_sufficient_first
          new_score_second = score_second + second_column[letter]
          if new_score_second >= least_sufficient_second
            new_scores[new_score_first][new_score_second] += yield(count, letter)
          end
        end
      end

    end
  end
  new_scores
end

#revcomp?Boolean

Returns:

  • (Boolean)


45
46
47
# File 'lib/macroape/pwm_compare_aligned.rb', line 45

def revcomp?
  orientation == :revcomp
end

#second_overlaps?(pos) ⇒ Boolean

Returns:

  • (Boolean)


91
92
93
94
95
96
97
98
# File 'lib/macroape/pwm_compare_aligned.rb', line 91

def second_overlaps?(pos)
  return false unless pos >= 0 && pos < length
  if shift > 0
    pos >= shift && pos < shift + second_length
  else
    pos < second_length
  end
end

#second_pwm_alignmentObject



63
64
65
66
67
68
69
70
71
# File 'lib/macroape/pwm_compare_aligned.rb', line 63

def second_pwm_alignment
  length.times.map do |pos|
    if second_overlaps?(pos)
      direct? ? '>' : '<'
    else
      '.'
    end
  end.join
end