Class: Peptide
- Inherits:
-
Object
- Object
- Peptide
- Defined in:
- lib/protk/peptide.rb
Instance Attribute Summary collapse
-
#charge ⇒ Object
Returns the value of attribute charge.
-
#nsp_adjusted_probability ⇒ Object
Returns the value of attribute nsp_adjusted_probability.
-
#protein_name ⇒ Object
Returns the value of attribute protein_name.
-
#sequence ⇒ Object
Returns the value of attribute sequence.
Class Method Summary collapse
Instance Method Summary collapse
-
#coords_in_protein(prot_seq, reverse = false) ⇒ Object
Expects prot_seq not to contain explicit stop codon (ie * at end) AA coords are 0-based unlike genomic coords which are 1 based.
- #gff_record_for_peptide_fragment(start_i, end_i, parent_record) ⇒ Object
-
#initialize ⇒ Peptide
constructor
A new instance of Peptide.
-
#to_gff3_records(prot_seq, parent_record, cds_records) ⇒ Object
Returns a list of fragments (hashes with start and end) in GFF style (1 based) genomic coordinates.
Constructor Details
#initialize ⇒ Peptide
Returns a new instance of Peptide.
36 37 38 |
# File 'lib/protk/peptide.rb', line 36 def initialize() end |
Instance Attribute Details
#charge ⇒ Object
Returns the value of attribute charge.
13 14 15 |
# File 'lib/protk/peptide.rb', line 13 def charge @charge end |
#nsp_adjusted_probability ⇒ Object
Returns the value of attribute nsp_adjusted_probability.
14 15 16 |
# File 'lib/protk/peptide.rb', line 14 def nsp_adjusted_probability @nsp_adjusted_probability end |
#protein_name ⇒ Object
Returns the value of attribute protein_name.
12 13 14 |
# File 'lib/protk/peptide.rb', line 12 def protein_name @protein_name end |
#sequence ⇒ Object
Returns the value of attribute sequence.
11 12 13 |
# File 'lib/protk/peptide.rb', line 11 def sequence @sequence end |
Class Method Details
.from_protxml(xmlnode) ⇒ Object
19 20 21 22 23 24 25 |
# File 'lib/protk/peptide.rb', line 19 def from_protxml(xmlnode) pep=new() pep.sequence=xmlnode['peptide_sequence'] pep.nsp_adjusted_probability=xmlnode['nsp_adjusted_probability'].to_f pep.charge=xmlnode['charge'].to_i pep end |
.from_sequence(seq, charge = nil) ⇒ Object
27 28 29 30 31 32 |
# File 'lib/protk/peptide.rb', line 27 def from_sequence(seq,charge=nil) pep=new() pep.sequence=seq pep.charge=charge pep end |
Instance Method Details
#coords_in_protein(prot_seq, reverse = false) ⇒ Object
Expects prot_seq not to contain explicit stop codon (ie * at end) AA coords are 0-based unlike genomic coords which are 1 based
43 44 45 46 47 48 49 50 51 52 53 54 |
# File 'lib/protk/peptide.rb', line 43 def coords_in_protein(prot_seq,reverse=false) if reverse pep_index = prot_seq.reverse.index(self.sequence.reverse) raise PeptideNotInProteinError if pep_index.nil? pep_start_i = pep_index else pep_start_i = prot_seq.index(self.sequence) raise PeptideNotInProteinError if pep_start_i.nil? end pep_end_i = pep_start_i+self.sequence.length {:start => pep_start_i,:end => pep_end_i} end |
#gff_record_for_peptide_fragment(start_i, end_i, parent_record) ⇒ Object
148 149 150 151 152 153 154 155 |
# File 'lib/protk/peptide.rb', line 148 def gff_record_for_peptide_fragment(start_i,end_i,parent_record) cds_id = parent_record.id this_id = "#{cds_id}.#{self.sequence}" this_id << ".#{self.charge}" unless self.charge.nil? score = self.nsp_adjusted_probability.nil? ? "." : self.nsp_adjusted_probability.to_s gff_string = "#{parent_record.seqid}\tMSMS\tpolypeptide\t#{start_i}\t#{end_i}\t#{score}\t#{parent_record.strand}\t0\tID=#{this_id};Parent=#{cds_id}" Bio::GFF::GFF3::Record.new(gff_string) end |
#to_gff3_records(prot_seq, parent_record, cds_records) ⇒ Object
Returns a list of fragments (hashes with start and end) in GFF style (1 based) genomic coordinates
Assumes that cds_coords is inclusive of the entire protein sequence including start-met
We assume that gff records conform to the spec
www.sequenceontology.org/gff3.shtml
This part of the spec is crucial
-
The START and STOP codons are included in the CDS.
-
That is, if the locations of the start and stop codons are known,
-
the first three base pairs of the CDS should correspond to the start codon
-
and the last three correspond the stop codon.
We also assume that all the cds records provided, actually form part of the protein (ie skipped exons should not be included)
74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
# File 'lib/protk/peptide.rb', line 74 def to_gff3_records(prot_seq,parent_record,cds_records) throw "Expected GFF3 Record but got #{parent_record.class}" unless parent_record.class==Bio::GFF::GFF3::Record throw "Expected Array but got #{cds_records.class}" unless cds_records.class==Array on_reverse_strand = (parent_record.strand=="-") ? true : false aa_coords = coords_in_protein(prot_seq,false) # Always use forward protein coordinates ordered_cds_records = on_reverse_strand ? cds_records.sort.reverse : cds_records.sort # Initial position is the number of NA's from the start of translation # pep_nalen = self.sequence.length*3 i = 0; #Current protein position (in nucleic acids) pep_start_i = aa_coords[:start]*3 pep_end_i = pep_start_i+self.sequence.length*3 fragments=[] ordered_cds_records.each do |cds_record| # puts cds_record fragment = nil fragment_len = 0 if on_reverse_strand in_peptide = (i<pep_end_i) && (i>=pep_start_i) before_len = [pep_start_i-i,0].max # puts before_len # puts in_peptide # puts "i #{i} pi #{pep_end_i} psi #{pep_start_i}" if in_peptide fragment_end = cds_record.end fragment_len = [cds_record.length,pep_end_i-i].min fragment_start = fragment_end-fragment_len+1 # fragment = {:start=>fragment_start,:end=>fragment_end} fragment = gff_record_for_peptide_fragment(fragment_start,fragment_end,cds_record) elsif before_len>0 fragment_end = cds_record.end - before_len fragment_len = [cds_record.length-before_len,pep_end_i-i-before_len].min # puts "Frag len #{fragment_len}" fragment_start = fragment_end - fragment_len + 1 fragment = gff_record_for_peptide_fragment(fragment_start,fragment_end,cds_record) # fragment = {:start=>fragment_start,:end=>fragment_end} else fragment=nil end else in_peptide = (i<pep_end_i) && (i>=pep_start_i) before_len = [pep_start_i-i,0].max if in_peptide fragment_start = cds_record.start fragment_len = [cds_record.length,pep_end_i-i].min fragment_end = fragment_start+fragment_len-1 # fragment = {:start=>fragment_start,:end=>fragment_end} fragment = gff_record_for_peptide_fragment(fragment_start,fragment_end,cds_record) elsif before_len>0 fragment_start = cds_record.start + before_len fragment_len = [cds_record.length-before_len,pep_end_i-i-before_len].min fragment_end = fragment_start + fragment_len-1 # fragment = {:start=>fragment_start,:end=>fragment_end} fragment = gff_record_for_peptide_fragment(fragment_start,fragment_end,cds_record) else fragment=nil end end i+=cds_record.length fragments << fragment unless fragment.nil? end fragments end |