Class: Ytilib::PM
- Defined in:
- lib/sequence_logo/ytilib/pm.rb,
lib/sequence_logo/ytilib/pmsd.rb,
lib/sequence_logo/ytilib/infocod.rb
Direct Known Subclasses
Constant Summary collapse
- IUPAC_LS =
(IUPAC::CODE.keys - ['A','C','G','T']).collect { |iul| [IUPAC::CODE[iul], iul.split(//)] }
Instance Attribute Summary collapse
-
#matrix ⇒ Object
readonly
Returns the value of attribute matrix.
-
#size ⇒ Object
(also: #length)
readonly
Returns the value of attribute size.
-
#words_count ⇒ Object
Returns the value of attribute words_count.
Class Method Summary collapse
- .col_sum(matrix, index = 0) ⇒ Object
- .from_bismark(b, iupacomp = false) ⇒ Object
- .load(filename) ⇒ Object
- .new_pcm(words, iupacomp = false) ⇒ Object
- .new_pwm(words) ⇒ Object
Instance Method Summary collapse
- #best_hit(s, use2strands = true) ⇒ Object
- #best_score ⇒ Object
- #best_word ⇒ Object
- #col_sum(index = 0, letset = ['A','C','G','T']) ⇒ Object
- #collect_hits(s, score_g, use2strands = true) ⇒ Object
- #consensus ⇒ Object
- #consensus_string(beautiful = false) ⇒ Object
- #dup ⇒ Object
- #each_position(&block) ⇒ Object
- #each_position_index(&block) ⇒ Object
- #eql?(pm) ⇒ Boolean
- #fast_score_sigma ⇒ Object
- #fast_score_sigma_precise ⇒ Object
- #find_hit(s, score_g, use2strands = true) ⇒ Object
- #find_hits(s, score_g, use2strands = true) ⇒ Object
- #fixwc ⇒ Object
- #flexeql?(pm) ⇒ Boolean
- #get_ppm(words_count = nil) ⇒ Object (also: #to_ppm)
- #get_pwm(words_count = nil, probs = Randoom::DEF_PROBS, pseudocount = 1) ⇒ Object (also: #to_pwm)
- #icd2of4(floor = false) ⇒ Object
- #icd3of4(floor = false) ⇒ Object
- #icd4of4(floor = false) ⇒ Object
- #icdThc ⇒ Object
- #icdTlc ⇒ Object
- #infocod(position = nil) ⇒ Object (also: #icd)
-
#initialize(size, matrix = nil, words_count = nil) ⇒ PM
constructor
A new instance of PM.
- #iupacomp! ⇒ Object
- #m3sd(bckgr = Randoom::DEF_PROBS) ⇒ Object
- #p_value(threshold, mean = nil, variance = nil) ⇒ Object
- #positiv! ⇒ Object
- #revcomp! ⇒ Object
- #save(filename) ⇒ Object
- #score(word) ⇒ Object
- #score_mean(bckgr = Randoom::DEF_PROBS) ⇒ Object
- #score_sigma(trycount = 4**10, approx = false, bg = nil) ⇒ Object
- #score_variance(bckgr = Randoom::DEF_PROBS) ⇒ Object
- #strict_consensus ⇒ Object
- #to_bismark(b) ⇒ Object
- #to_pwm!(words_count = nil, probs = Randoom::DEF_PROBS, pseudocount = 1) ⇒ Object
- #worst_score ⇒ Object
Constructor Details
#initialize(size, matrix = nil, words_count = nil) ⇒ PM
Returns a new instance of PM.
161 162 163 164 165 166 167 168 169 170 171 |
# File 'lib/sequence_logo/ytilib/pm.rb', line 161 def initialize(size, matrix = nil, words_count = nil) checkerr("matrix['A'].size != size, #{matrix['A'].size} != #{size}") { matrix != nil && size != matrix['A'].size } @size = size @matrix = matrix == nil ? PM.new_matrix(size) : matrix if !words_count || words_count <= 0 words_count = col_sum(0) @words_count = words_count.round >= 2 ? words_count.round : nil else @words_count = words_count end end |
Instance Attribute Details
#matrix ⇒ Object (readonly)
Returns the value of attribute matrix.
4 5 6 |
# File 'lib/sequence_logo/ytilib/pm.rb', line 4 def matrix @matrix end |
#size ⇒ Object (readonly) Also known as: length
Returns the value of attribute size.
4 5 6 |
# File 'lib/sequence_logo/ytilib/pm.rb', line 4 def size @size end |
#words_count ⇒ Object
Returns the value of attribute words_count.
5 6 7 |
# File 'lib/sequence_logo/ytilib/pm.rb', line 5 def words_count @words_count end |
Class Method Details
.col_sum(matrix, index = 0) ⇒ Object
177 178 179 |
# File 'lib/sequence_logo/ytilib/pm.rb', line 177 def PM.col_sum(matrix, index = 0) return matrix['A'][index] + matrix['C'][index] + matrix['G'][index] + matrix['T'][index] end |
.from_bismark(b, iupacomp = false) ⇒ Object
347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 |
# File 'lib/sequence_logo/ytilib/pm.rb', line 347 def PM.from_bismark(b, iupacomp = false) checkerr("empty small-BiSMark file?") { !b } float_m = (b.name == "PPM" || b.name == "PWM" || b.name == "WPCM") words_count = b.attributes["words-count"] ? b.attributes["words-count"].to_f : nil matrix = {"A" => [], "C" => [], "G" => [], "T" => []} b.elements.each("pm-column") { |pmc| position = pmc.attributes["position"].to_i ['A', 'C', 'G', 'T'].each { |l| matrix[l][position-1] = float_m ? pmc.elements[l.downcase].get_text.to_s.to_f : pmc.elements[l.downcase].get_text.to_s.to_i } } if b.name == "PPM" newppm = PPM.new(matrix['A'].size, matrix, words_count) newppm.iupacomp! if iupacomp return newppm end if b.name == "PCM" @words_count = col_sum(matrix) newpcm = PM.new(matrix['A'].size, matrix, words_count) newpcm.iupacomp! if iupacomp return newpcm end if b.name == "PWM" && iupacomp raise "cannot force IUPAC compatible PWM" end return PM.new(matrix['A'].size, matrix, words_count) end |
.load(filename) ⇒ Object
273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 |
# File 'lib/sequence_logo/ytilib/pm.rb', line 273 def PM.load(filename) # supporting pat & pwm formats (letter-column and letter-row format) input = IO.read(filename) tm = [] input.each_line { |line| l_a = line.split begin l_a = l_a.collect { |a_i| Float(a_i) } rescue next end tm << l_a } tm = tm.transpose if tm.size == 4 matrix = PM.new_matrix(tm.size) tm.each_index { |i| ['A', 'C', 'G', 'T'].each_with_index { |l, j| matrix[l][i] = tm[i][j] } } ppm_mode = (0...tm.size).inject(true) { |ppm_ya, i| ppm_ya &= col_sum(matrix, i).round == 1 } return ppm_mode ? PPM.new(tm.size, matrix) : PM.new(tm.size, matrix) end |
.new_pcm(words, iupacomp = false) ⇒ Object
247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 |
# File 'lib/sequence_logo/ytilib/pm.rb', line 247 def PM.new_pcm(words, iupacomp = false) size = words[0].size counts = PM.new_matrix(size) counts.each_value { |arr| arr.fill(0) } words.each { |word| 0.upto(size-1) { |i| letter = word[i,1].upcase checkerr("unknown letter #{letter}") { !['A', 'C', 'G', 'T', 'N'].include?(letter) } if letter != 'N' counts[letter][i] += 1 else ['A', 'C', 'G', 'T'].each { |l| counts[l][i] += 0.25 } end } } newpcm = PM.new(size, counts, words.size) newpcm.iupacomp! if iupacomp return newpcm end |
.new_pwm(words) ⇒ Object
267 268 269 270 271 |
# File 'lib/sequence_logo/ytilib/pm.rb', line 267 def PM.new_pwm(words) pcm = PM.new_pcm(words) pcm.to_pwm! return pcm end |
Instance Method Details
#best_hit(s, use2strands = true) ⇒ Object
134 135 136 137 138 139 140 141 142 |
# File 'lib/sequence_logo/ytilib/pm.rb', line 134 def best_hit(s, use2strands = true) checkerr("too short sequence") { s.size < @size } return (0..(s.size - @size)).inject(-Float::MAX) { |r, i| seq, seq_rc = s[i, @size], s[i, @size].revcomp! score_p, score_rc = score(seq), score(seq_rc) r = use2strands ? [r,score_p,score_rc].max : [r,score_p].max } end |
#best_score ⇒ Object
229 230 231 232 233 |
# File 'lib/sequence_logo/ytilib/pm.rb', line 229 def best_score return (0...size).inject(0) { |sum, i| sum += ['A', 'C', 'G', 'T'].collect { |l| @matrix[l][i] }.max } end |
#best_word ⇒ Object
39 40 41 42 43 44 45 |
# File 'lib/sequence_logo/ytilib/pm.rb', line 39 def best_word return (0...size).inject("") { |word, i| max = ['A', 'C', 'G', 'T'].collect { |l| @matrix[l][i] }.max maxlets = ['A', 'C', 'G', 'T'].select { |l| @matrix[l][i] == max } word << (maxlets.size == 1 ? maxlets.first : "N") } end |
#col_sum(index = 0, letset = ['A','C','G','T']) ⇒ Object
173 174 175 |
# File 'lib/sequence_logo/ytilib/pm.rb', line 173 def col_sum(index = 0, letset = ['A','C','G','T']) return letset.inject(0) { |sum, l| sum += @matrix[l][index] } end |
#collect_hits(s, score_g, use2strands = true) ⇒ Object
123 124 125 126 127 128 129 130 131 132 |
# File 'lib/sequence_logo/ytilib/pm.rb', line 123 def collect_hits(s, score_g, use2strands = true) result = [] (0..(s.size - @size)).each { |i| seq, seq_rc = s[i, @size], s[i, @size].revcomp! score_p, score_rc = score(seq.upcase), score(seq_rc.upcase) result << [score_p, seq, false, i] if score_p >= score_g result << [score_rc, seq_rc, true, i] if score_rc >= score_g } result end |
#consensus ⇒ Object
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
# File 'lib/sequence_logo/ytilib/pm.rb', line 81 def consensus checkerr("words count is undefined") { !@words_count } i2o4, thc, tlc = icd2of4, icdThc, icdTlc icd = infocod return IUPAC.new((0...size).inject("") { |word, i| scores = ['A', 'C', 'G', 'T'].collect { |l| @matrix[l][i] }.uniq.sort.reverse if icd[i] > i2o4 scores = [scores.first] elsif icd[i] > thc scores = scores[0..1] elsif icd[i] > tlc scores = scores[0..2] end lets = ['A', 'C', 'G', 'T'].inject("") { |lets, l| lets += scores.include?(@matrix[l][i]) ? l : ""} word += IUPAC::CODE[lets] }) end |
#consensus_string(beautiful = false) ⇒ Object
55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
# File 'lib/sequence_logo/ytilib/pm.rb', line 55 def consensus_string(beautiful = false) checkerr("words count is undefined") { !@words_count } i2o4, thc, tlc = icd2of4, icdThc, icdTlc icd = infocod return String.new((0...size).inject("") { |word, i| scores = ['A', 'C', 'G', 'T'].collect { |l| @matrix[l][i] }.uniq.sort.reverse if icd[i] > i2o4 scores = [scores.first] elsif icd[i] > thc scores = scores[0..1] elsif icd[i] > tlc scores = scores[0..2] end lets = ['A', 'C', 'G', 'T'].inject("") { |lets, l| lets += scores.include?(@matrix[l][i]) ? l : ""} reslet = IUPAC::CODE[lets] reslet = reslet.downcase if beautiful && lets.size > 2 word += reslet }) end |
#dup ⇒ Object
241 242 243 244 245 |
# File 'lib/sequence_logo/ytilib/pm.rb', line 241 def dup new_matrix = {} @matrix.each_key { |letter| new_matrix[letter] = @matrix[letter].dup } return PM.new(@size, new_matrix, @words_count) end |
#each_position(&block) ⇒ Object
13 14 15 16 17 18 19 |
# File 'lib/sequence_logo/ytilib/pm.rb', line 13 def each_position(&block) return enum_for(:each_position) unless block_given? @matrix['A'].each_index do |i| position = ['A', 'C', 'G', 'T'].map{|letter| @matrix[letter][i] } yield position end end |
#each_position_index(&block) ⇒ Object
9 10 11 |
# File 'lib/sequence_logo/ytilib/pm.rb', line 9 def each_position_index(&block) @matrix['A'].each_index(&block) end |
#eql?(pm) ⇒ Boolean
144 145 146 147 148 |
# File 'lib/sequence_logo/ytilib/pm.rb', line 144 def eql?(pm) return ['A','C','G','T'].inject(true) { |equal, letter| equal = equal && @matrix[letter].eql?(pm.matrix[letter]) } end |
#fast_score_sigma ⇒ Object
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
# File 'lib/sequence_logo/ytilib/pmsd.rb', line 36 def fast_score_sigma n, mean, m2 = 0, 0, 0 recursive_walk([matrix['A'],matrix['C'],matrix['G'],matrix['T']], 0, 0) { |x| n = n + 1 delta = x - mean mean = mean + delta/n m2 = m2 + delta*(x-mean) } variance = m2/(n - 1) if block_given? yield(sigma = Math.sqrt(variance), mean) end return sigma end |
#fast_score_sigma_precise ⇒ Object
55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
# File 'lib/sequence_logo/ytilib/pmsd.rb', line 55 def fast_score_sigma_precise n, mean = 0, 0 recursive_walk([matrix['A'],matrix['C'],matrix['G'],matrix['T']], 0, 0) { |x| n += 1 delta = x - mean mean = mean + delta/n } n, m2 = 0, 0 recursive_walk([matrix['A'],matrix['C'],matrix['G'],matrix['T']], 0, 0) { |x| n = n + 1 delta = x - mean m2 = m2 + delta*(x-mean) } variance = m2/(n - 1) if block_given? yield(sigma = Math.sqrt(variance), mean) end return sigma end |
#find_hit(s, score_g, use2strands = true) ⇒ Object
104 105 106 107 108 109 110 111 112 |
# File 'lib/sequence_logo/ytilib/pm.rb', line 104 def find_hit(s, score_g, use2strands = true) (0..(s.size - @size)).each { |i| seq, seq_rc = s[i, @size], s[i, @size].revcomp! score_p, score_rc = score(seq), score(seq_rc) r = use2strands ? [score_p,score_rc].max : score_p return i if r >= score_g } return nil end |
#find_hits(s, score_g, use2strands = true) ⇒ Object
114 115 116 117 118 119 120 121 |
# File 'lib/sequence_logo/ytilib/pm.rb', line 114 def find_hits(s, score_g, use2strands = true) (0..(s.size - @size)).select { |i| seq, seq_rc = s[i, @size], s[i, @size].revcomp! score_p, score_rc = score(seq), score(seq_rc) r = use2strands ? [score_p,score_rc].max : score_p r >= score_g ? i : nil }.compact end |
#fixwc ⇒ Object
399 400 401 402 |
# File 'lib/sequence_logo/ytilib/pm.rb', line 399 def fixwc return unless @words_count @words_count = (0...@size).collect { |i| col_sum(i) }.max end |
#flexeql?(pm) ⇒ Boolean
150 151 152 153 154 155 156 157 158 159 |
# File 'lib/sequence_logo/ytilib/pm.rb', line 150 def flexeql?(pm) checkerr("for what?") { true } return ['A','C','G','T'].inject(true) { |equal, letter| # report "letter=#{letter}" equal = equal && (0...@size).inject(true) { |deepequal, position| # report "position=#{position}, delta=#{@matrix[letter][position] - pm.matrix[letter][position]}" deepequal = deepequal && (@matrix[letter][position] - pm.matrix[letter][position]).abs < 10**-11 } } end |
#get_ppm(words_count = nil) ⇒ Object Also known as: to_ppm
206 207 208 209 210 211 212 213 214 215 216 |
# File 'lib/sequence_logo/ytilib/pm.rb', line 206 def get_ppm(words_count = nil) words_count = @words_count unless words_count checkerr("undefined words count") { !words_count || words_count <= 0 } ppm = @matrix['N'] ? PM.new_matrix_iupac(@size) : PM.new_matrix(@size) @matrix.each_key { |letter| (0...@size).each { |i| ppm[letter][i] = @matrix[letter][i].to_f / words_count } } return PPM.new(@size, ppm, words_count) end |
#get_pwm(words_count = nil, probs = Randoom::DEF_PROBS, pseudocount = 1) ⇒ Object Also known as: to_pwm
201 202 203 |
# File 'lib/sequence_logo/ytilib/pm.rb', line 201 def get_pwm(words_count = nil, probs = Randoom::DEF_PROBS, pseudocount = 1) return self.dup.to_pwm!(words_count, probs, pseudocount) end |
#icd2of4(floor = false) ⇒ Object
50 51 52 53 54 55 |
# File 'lib/sequence_logo/ytilib/infocod.rb', line 50 def icd2of4(floor = false) i2o4 = @words_count / 2.0 i2o4 = i2o4.floor if floor ([i2o4, i2o4, 0, 0].inject(0.0) { |sum, k_i| sum += k_i.log_fact } - @words_count.log_fact ) / @words_count # 0 is equal to @words_count % 2, because 0! = 1! end |
#icd3of4(floor = false) ⇒ Object
57 58 59 60 61 62 |
# File 'lib/sequence_logo/ytilib/infocod.rb', line 57 def icd3of4(floor = false) i3o4 = @words_count / 3.0 i3o4 = i3o4.floor if floor addon = floor ? @words_count % 3 : 0 ([i3o4, i3o4, i3o4, addon].inject(0.0) { |sum, k_i| sum += k_i.log_fact } - @words_count.log_fact ) / @words_count end |
#icd4of4(floor = false) ⇒ Object
73 74 75 76 77 |
# File 'lib/sequence_logo/ytilib/infocod.rb', line 73 def icd4of4(floor = false) i4o4 = @words_count / 4.0 i4o4 = i4o4.floor if floor ([i4o4, i4o4, i4o4, i4o4].inject(0.0) { |sum, k_i| sum += k_i.log_fact } - @words_count.log_fact ) / @words_count end |
#icdThc ⇒ Object
64 65 66 |
# File 'lib/sequence_logo/ytilib/infocod.rb', line 64 def icdThc icd3of4 end |
#icdTlc ⇒ Object
68 69 70 71 |
# File 'lib/sequence_logo/ytilib/infocod.rb', line 68 def icdTlc io = @words_count / 6.0 ([2*io, 2*io, io, io].inject(0.0) { |sum, k_i| sum += k_i.log_fact } - @words_count.log_fact ) / @words_count end |
#infocod(position = nil) ⇒ Object Also known as: icd
44 45 46 47 |
# File 'lib/sequence_logo/ytilib/infocod.rb', line 44 def infocod(position = nil) return infocod_private(position) if position (0...@size).collect { |i| infocod_private(i) } end |
#iupacomp! ⇒ Object
376 377 378 379 380 381 382 383 384 385 |
# File 'lib/sequence_logo/ytilib/pm.rb', line 376 def iupacomp! @words_count = (0...@size).collect { |i| col_sum(i) }.max unless @words_count # for unbalanced matrices (Genomatix has some) # @words_count = @words_count.round < 2.0 ? nil : @words_count.round IUPAC_LS.each { |iul_ls| @matrix[iul_ls[0]] = (0...@size).collect { |i| col_sum(i, iul_ls[1]) / iul_ls[1].size } } return self end |
#m3sd(bckgr = Randoom::DEF_PROBS) ⇒ Object
387 388 389 390 391 392 393 394 395 396 397 |
# File 'lib/sequence_logo/ytilib/pm.rb', line 387 def m3sd(bckgr = Randoom::DEF_PROBS) mean = (0...@size).inject(0.0) { |mean, i| mean += ['A','C','G','T'].inject(0.0) { |sum,l| sum += @matrix[l][i] * bckgr[l] } } dev = (0...@size).inject(0.0) { |m2, i| deltai = ['A','C','G','T'].inject(0.0) { |sum,l| sum += @matrix[l][i]**2 * bckgr[l] } - ['A','C','G','T'].inject(0.0) { |sum,l| sum += matrix[l][i] * bckgr[l] }**2 m2 += deltai } sigma = Math.sqrt(dev) mean+3*sigma end |
#p_value(threshold, mean = nil, variance = nil) ⇒ Object
32 33 34 35 36 37 |
# File 'lib/sequence_logo/ytilib/pm.rb', line 32 def p_value(threshold, mean = nil, variance = nil) mean = mean ? mean : score_mean variance = variance ? variance : score_variance n_ = (threshold - mean) / Math.sqrt(variance) p_value = (1 - Math.erf2(n_/Math.sqrt(2))) / 2.0 end |
#positiv! ⇒ Object
321 322 323 324 325 |
# File 'lib/sequence_logo/ytilib/pm.rb', line 321 def positiv! min = @matrix.values.collect { |v| v.min }.min.abs @matrix.each_value { |v| (0...v.size).each { |i| v[i] += min } } return self end |
#revcomp! ⇒ Object
327 328 329 330 331 332 |
# File 'lib/sequence_logo/ytilib/pm.rb', line 327 def revcomp! @matrix['A'], @matrix['T'] = @matrix['T'], @matrix['A'] @matrix['C'], @matrix['G'] = @matrix['G'], @matrix['C'] @matrix.each_value { |v| v.reverse! } self end |
#save(filename) ⇒ Object
295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 |
# File 'lib/sequence_logo/ytilib/pm.rb', line 295 def save(filename) File.open(filename, "w") { |out_f| case File.ext_wo_name(filename) when "pwm" ['A', 'C', 'G', 'T'].each { |letter| @matrix[letter].each { |e| out_f << "#{e} " } out_f << $/ } when "pat" out_f.puts File.name_wo_ext(filename) (0...@size).each { |i| ['A', 'C', 'G', 'T'].each { |letter| out_f << "#{@matrix[letter][i]} " } out_f << $/ } when "xml" checkerr("small-BiSMark is not supported at this moment") else checkerr("unknown motif file format specified") end } end |
#score(word) ⇒ Object
219 220 221 222 223 224 225 226 227 |
# File 'lib/sequence_logo/ytilib/pm.rb', line 219 def score(word) checkerr("word size != pwm.size") { @size != word.size } checkerr("word #{word} has strange characters") { @matrix.keys.include?('N') ? word.tr('ACGTRYKMSWBDHVN', '').size > 0 : word.tr('ACGT', '').size > 0 } return (0...@size).inject(0) { |sum, i| sum += @matrix[word[i,1]][i] } end |
#score_mean(bckgr = Randoom::DEF_PROBS) ⇒ Object
21 22 23 |
# File 'lib/sequence_logo/ytilib/pm.rb', line 21 def score_mean(bckgr = Randoom::DEF_PROBS) (0...@size).inject(0.0) { |mean, i| mean += ['A','C','G','T'].inject(0.0) { |sum,l| sum += @matrix[l][i] * bckgr[l] } } end |
#score_sigma(trycount = 4**10, approx = false, bg = nil) ⇒ Object
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
# File 'lib/sequence_logo/ytilib/pmsd.rb', line 4 def score_sigma(trycount = 4**10, approx = false, bg = nil) scores = [] if @size <= 10 && !approx (0...4**@size).each { |i| word = i.to_s(4).rjust(@size, "0").tr("0123", "ACGT") scores << score(word) } else trycount.times { word = bg ? Randoom.rand_seq(@size, bg) : Randoom.rand_seq(@size) scores << score(word) } end sum1 = scores.inject(0) { |sum,s| sum += s } mean = sum1 / scores.size sum2, sumc = 0, 0 scores.each { |score| sum2 += (score-mean)**2 sumc += (score-mean) } variance = (sum2 - sumc**2 / scores.size) / (scores.size-1) sigma = Math.sqrt(variance) if block_given? yield(sigma, mean) end return sigma end |
#score_variance(bckgr = Randoom::DEF_PROBS) ⇒ Object
25 26 27 28 29 30 |
# File 'lib/sequence_logo/ytilib/pm.rb', line 25 def score_variance(bckgr = Randoom::DEF_PROBS) (0...@size).inject(0.0) { |m2, i| deltai = ['A','C','G','T'].inject(0.0) { |sum,l| sum += @matrix[l][i]**2 * bckgr[l] } - ['A','C','G','T'].inject(0.0) { |sum,l| sum += matrix[l][i] * bckgr[l] }**2 m2 += deltai } end |
#strict_consensus ⇒ Object
47 48 49 50 51 52 53 |
# File 'lib/sequence_logo/ytilib/pm.rb', line 47 def strict_consensus return IUPAC.new((0...size).inject("") { |word, i| max = ['A', 'C', 'G', 'T'].collect { |l| @matrix[l][i] }.max maxlets = ['A', 'C', 'G', 'T'].inject("") { |lets, l| lets += @matrix[l][i] == max ? l : ""} word += IUPAC::CODE[maxlets] }) end |
#to_bismark(b) ⇒ Object
334 335 336 337 338 339 340 341 342 343 344 345 |
# File 'lib/sequence_logo/ytilib/pm.rb', line 334 def to_bismark(b) pwm = @matrix['A'][0].is_a?(Float) attributes = {"length" => @size} attributes["words-count"] = @words_count if @words_count && @words_count > 0 pe = b.add_element( pwm ? "PWM" : "PCM", attributes ) each_position_index do |i| pm_c = pe.add_element("pm-column", {"position" => i+1}) ['A', 'C', 'G', 'T'].each { |l| pm_c.add_element(l.downcase).add_text(@matrix[l][i].to_s) } end end |
#to_pwm!(words_count = nil, probs = Randoom::DEF_PROBS, pseudocount = 1) ⇒ Object
181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 |
# File 'lib/sequence_logo/ytilib/pm.rb', line 181 def to_pwm!(words_count = nil, probs = Randoom::DEF_PROBS, pseudocount = 1) @words_count = words_count if words_count && words_count > 0 @matrix.each_key do |letter| (0...@size).each { |pos| #p "pcm" #p @matrix[letter][pos] #p @matrix[letter][pos] + (probs[letter] * pseudocount) #p ( (@words_count + pseudocount) * probs[letter]) #exit @matrix[letter][pos] = Math::log( (@matrix[letter][pos] + (probs[letter] * pseudocount)) / ( (@words_count + pseudocount) * probs[letter]) ) } end return self end |
#worst_score ⇒ Object
235 236 237 238 239 |
# File 'lib/sequence_logo/ytilib/pm.rb', line 235 def worst_score return (0...size).inject(0) { |sum, i| sum += ['A', 'C', 'G', 'T'].collect { |l| @matrix[l][i] }.min } end |