Class: Peptide
- Inherits:
-
Object
- Object
- Peptide
- Defined in:
- lib/protk/peptide.rb
Instance Attribute Summary collapse
-
#charge ⇒ Object
Returns the value of attribute charge.
-
#nsp_adjusted_probability ⇒ Object
Returns the value of attribute nsp_adjusted_probability.
-
#protein_name ⇒ Object
Returns the value of attribute protein_name.
-
#sequence ⇒ Object
Returns the value of attribute sequence.
Class Method Summary collapse
Instance Method Summary collapse
-
#coords_in_protein(prot_seq, reverse = false) ⇒ Object
Expects prot_seq not to contain explicit stop codon (ie * at end) AA coords are 0-based unlike genomic coords which are 1 based.
- #gff_record_for_peptide_fragment(start_i, end_i, parent_record) ⇒ Object
-
#initialize ⇒ Peptide
constructor
A new instance of Peptide.
-
#to_gff3_records(prot_seq, parent_record, cds_records) ⇒ Object
Returns a list of fragments (hashes with start and end) in GFF style (1 based) genomic coordinates.
Constructor Details
#initialize ⇒ Peptide
Returns a new instance of Peptide.
38 39 40 |
# File 'lib/protk/peptide.rb', line 38 def initialize() end |
Instance Attribute Details
#charge ⇒ Object
Returns the value of attribute charge.
15 16 17 |
# File 'lib/protk/peptide.rb', line 15 def charge @charge end |
#nsp_adjusted_probability ⇒ Object
Returns the value of attribute nsp_adjusted_probability.
16 17 18 |
# File 'lib/protk/peptide.rb', line 16 def nsp_adjusted_probability @nsp_adjusted_probability end |
#protein_name ⇒ Object
Returns the value of attribute protein_name.
14 15 16 |
# File 'lib/protk/peptide.rb', line 14 def protein_name @protein_name end |
#sequence ⇒ Object
Returns the value of attribute sequence.
13 14 15 |
# File 'lib/protk/peptide.rb', line 13 def sequence @sequence end |
Class Method Details
.from_protxml(xmlnode) ⇒ Object
21 22 23 24 25 26 27 |
# File 'lib/protk/peptide.rb', line 21 def from_protxml(xmlnode) pep=new() pep.sequence=xmlnode['peptide_sequence'] pep.nsp_adjusted_probability=xmlnode['nsp_adjusted_probability'].to_f pep.charge=xmlnode['charge'].to_i pep end |
.from_sequence(seq, charge = nil) ⇒ Object
29 30 31 32 33 34 |
# File 'lib/protk/peptide.rb', line 29 def from_sequence(seq,charge=nil) pep=new() pep.sequence=seq pep.charge=charge pep end |
Instance Method Details
#coords_in_protein(prot_seq, reverse = false) ⇒ Object
Expects prot_seq not to contain explicit stop codon (ie * at end) AA coords are 0-based unlike genomic coords which are 1 based
45 46 47 48 49 50 51 52 53 54 55 56 |
# File 'lib/protk/peptide.rb', line 45 def coords_in_protein(prot_seq,reverse=false) if reverse pep_index = prot_seq.reverse.index(self.sequence.reverse) raise PeptideNotInProteinError.new("Peptide #{self.sequence} not found in protein #{prot_seq} ") if pep_index.nil? pep_start_i = pep_index else pep_start_i = prot_seq.index(self.sequence) raise PeptideNotInProteinError.new("Peptide #{self.sequence} not found in protein #{prot_seq} ") if pep_start_i.nil? end pep_end_i = pep_start_i+self.sequence.length {:start => pep_start_i,:end => pep_end_i} end |
#gff_record_for_peptide_fragment(start_i, end_i, parent_record) ⇒ Object
145 146 147 148 149 150 151 152 |
# File 'lib/protk/peptide.rb', line 145 def gff_record_for_peptide_fragment(start_i,end_i,parent_record) cds_id = parent_record.id this_id = "#{cds_id}.#{self.sequence}" this_id << ".#{self.charge}" unless self.charge.nil? score = self.nsp_adjusted_probability.nil? ? "." : self.nsp_adjusted_probability.to_s gff_string = "#{parent_record.seqid}\tMSMS\tpolypeptide\t#{start_i}\t#{end_i}\t#{score}\t#{parent_record.strand}\t0\tID=#{this_id};Parent=#{cds_id}" Bio::GFF::GFF3::Record.new(gff_string) end |
#to_gff3_records(prot_seq, parent_record, cds_records) ⇒ Object
Returns a list of fragments (hashes with start and end) in GFF style (1 based) genomic coordinates
Assumes that cds_coords is inclusive of the entire protein sequence including start-met
We assume that gff records conform to the spec
www.sequenceontology.org/gff3.shtml
This part of the spec is crucial
-
The START and STOP codons are included in the CDS.
-
That is, if the locations of the start and stop codons are known,
-
the first three base pairs of the CDS should correspond to the start codon
-
and the last three correspond the stop codon.
We also assume that all the cds records provided, actually form part of the protein (ie skipped exons should not be included)
76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
# File 'lib/protk/peptide.rb', line 76 def to_gff3_records(prot_seq,parent_record,cds_records) throw "Expected GFF3 Record but got #{parent_record.class}" unless parent_record.class==Bio::GFF::GFF3::Record throw "Expected Array but got #{cds_records.class}" unless cds_records.class==Array on_reverse_strand = (parent_record.strand=="-") ? true : false aa_coords = coords_in_protein(prot_seq,false) # Always use forward protein coordinates ordered_cds_records = on_reverse_strand ? cds_records.sort.reverse : cds_records.sort # Initial position is the number of NA's from the start of translation # pep_nalen = self.sequence.length*3 i = 0; #Current protein position (in nucleic acids) pep_start_i = aa_coords[:start]*3 pep_end_i = pep_start_i+self.sequence.length*3 fragments=[] ordered_cds_records.each do |cds_record| fragment = nil fragment_len = 0 if on_reverse_strand in_peptide = (i<pep_end_i) && (i>=pep_start_i) before_len = [pep_start_i-i,0].max if in_peptide fragment_end = cds_record.end fragment_len = [cds_record.length,pep_end_i-i].min fragment_start = fragment_end-fragment_len+1 fragment = gff_record_for_peptide_fragment(fragment_start,fragment_end,cds_record) elsif before_len>0 fragment_end = cds_record.end - before_len fragment_len = [cds_record.length-before_len,pep_end_i-i-before_len].min fragment_start = fragment_end - fragment_len + 1 if fragment_len>0 fragment = gff_record_for_peptide_fragment(fragment_start,fragment_end,cds_record) end else fragment=nil end else in_peptide = (i<pep_end_i) && (i>=pep_start_i) before_len = [pep_start_i-i,0].max if in_peptide fragment_start = cds_record.start fragment_len = [cds_record.length,pep_end_i-i].min fragment_end = fragment_start+fragment_len-1 fragment = gff_record_for_peptide_fragment(fragment_start,fragment_end,cds_record) elsif before_len>0 fragment_start = cds_record.start + before_len fragment_len = [cds_record.length-before_len,pep_end_i-i-before_len].min fragment_end = fragment_start + fragment_len-1 if fragment_len>0 fragment = gff_record_for_peptide_fragment(fragment_start,fragment_end,cds_record) end else fragment=nil end end i+=cds_record.length fragments << fragment unless fragment.nil? end fragments end |