Class: SpecID::AAFreqs

Inherits:
Object
  • Object
show all
Defined in:
lib/spec_id/aa_freqs.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(fasta_file = nil) ⇒ AAFreqs

Returns a new instance of AAFreqs.



12
13
14
15
16
17
# File 'lib/spec_id/aa_freqs.rb', line 12

def initialize(fasta_file=nil)
  if fasta_file
    @fasta = Fasta.new.read_file(fasta_file)      
    @aafreqs = calculate_frequencies(@fasta)
  end
end

Instance Attribute Details

#aafreqsObject

hash by capital one-letter amino acid symbols giving the frequency of seeing that amino acid. Frequencies should add to 1.



10
11
12
# File 'lib/spec_id/aa_freqs.rb', line 10

def aafreqs
  @aafreqs
end

#fastaObject

a fasta object



7
8
9
# File 'lib/spec_id/aa_freqs.rb', line 7

def fasta
  @fasta
end

Class Method Details

.probability_of_length_table(frequency, max_length) ⇒ Object

The expected probability for seeing that amino acid in a given length. This calculates a lookup table (array) from 0 to highest_length of the probability of seeing at least one amino acid (given its frequency, where frequency is from 0 to 1)



49
50
51
52
53
54
55
56
# File 'lib/spec_id/aa_freqs.rb', line 49

def self.probability_of_length_table(frequency, max_length)
  one_minus_freq = 1.0 - frequency.to_f
  lookup = Array.new(max_length + 1)
  (0..max_length).each do |len|
    lookup[len] =  1.0 - (one_minus_freq**len);
  end
  lookup
end

Instance Method Details

#actual_and_expected_number(peptide_aaseqs, amino_acid = :C, at_least = 1) ⇒ Object

takes an array of peptide strings gives the actual number of peptides with at least one gives the expected number of peptides given the probabilities in the length lookup table. currently ONLY takes at_least = 1 depends on @aafreqs returns two numbers in array [actual, expected] expected is a Float!!!



66
67
68
69
70
71
72
73
74
75
76
77
78
79
# File 'lib/spec_id/aa_freqs.rb', line 66

def actual_and_expected_number(peptide_aaseqs, amino_acid=:C, at_least=1)
  one_minus_freq = 1.0 - @aafreqs[amino_acid.to_sym]
  amino_acid_as_st = amino_acid.to_s
  probs = []
  actual = 0
  expected = 0.0
  peptide_aaseqs.each do |pep|
    expected += (1.0 - (one_minus_freq**pep.size))
    if pep.include?(amino_acid_as_st)
      actual += 1
    end
  end
  [actual, expected] 
end

#actual_and_expected_number_containing_cysteines(pep_objs, cyst_freq) ⇒ Object

pep_objs respond to sequence? also takes a hash of peptides keyed on :aaseq



83
84
85
86
87
88
89
90
91
92
93
94
# File 'lib/spec_id/aa_freqs.rb', line 83

def actual_and_expected_number_containing_cysteines(pep_objs, cyst_freq)
  if pep_objs.is_a? Hash
    seqs = pep_objs.keys
  else
    seqs = pep_objs.map do |v|
      v.aaseq
    end
  end
  @aafreqs ||= {}
  @aafreqs[:C] = cyst_freq
  actual_and_expected_number(seqs, :C, 1)
end

#calculate_frequencies(fasta) ⇒ Object

creates an aafreqs hash based on fasta object



20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# File 'lib/spec_id/aa_freqs.rb', line 20

def calculate_frequencies(fasta)
  hash = {}
  total_aas = 0
  ('A'..'Z').each do |x|
    hash[x] = 0
  end
  hash['*'] = 0
  fasta.prots.each do |prot|
    aaseq = prot.aaseq
    total_aas += aaseq.size
    aaseq.split('').each do |x|
      hash[x] += 1
    end
  end
  # normalize by total amount:
  hash.each do |k,v|
    hash[k] = hash[k].to_f / total_aas
  end
  # convert all strings to symbols:
  hash.each do |k,v|
    hash[k.to_sym] = hash.delete(k)
  end
  hash
end