Class: Peptide

Inherits:
Object
  • Object
show all
Defined in:
lib/protk/peptide.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initializePeptide

Returns a new instance of Peptide.


36
37
38
# File 'lib/protk/peptide.rb', line 36

def initialize()

end

Instance Attribute Details

#chargeObject

Returns the value of attribute charge


13
14
15
# File 'lib/protk/peptide.rb', line 13

def charge
  @charge
end

#nsp_adjusted_probabilityObject

Returns the value of attribute nsp_adjusted_probability


14
15
16
# File 'lib/protk/peptide.rb', line 14

def nsp_adjusted_probability
  @nsp_adjusted_probability
end

#protein_nameObject

Returns the value of attribute protein_name


12
13
14
# File 'lib/protk/peptide.rb', line 12

def protein_name
  @protein_name
end

#sequenceObject

Returns the value of attribute sequence


11
12
13
# File 'lib/protk/peptide.rb', line 11

def sequence
  @sequence
end

Class Method Details

.from_protxml(xmlnode) ⇒ Object


19
20
21
22
23
24
25
# File 'lib/protk/peptide.rb', line 19

def from_protxml(xmlnode)
	pep=new()
	pep.sequence=xmlnode['peptide_sequence']
	pep.nsp_adjusted_probability=xmlnode['nsp_adjusted_probability'].to_f
	pep.charge=xmlnode['charge'].to_i
	pep
end

.from_sequence(seq, charge = nil) ⇒ Object


27
28
29
30
31
32
# File 'lib/protk/peptide.rb', line 27

def from_sequence(seq,charge=nil)
	pep=new()
	pep.sequence=seq
	pep.charge=charge
	pep
end

Instance Method Details

#coords_in_protein(prot_seq, reverse = false) ⇒ Object

Expects prot_seq not to contain explicit stop codon (ie * at end) AA coords are 0-based unlike genomic coords which are 1 based


43
44
45
46
47
48
49
50
51
52
53
54
# File 'lib/protk/peptide.rb', line 43

def coords_in_protein(prot_seq,reverse=false)
	if reverse
		pep_index = prot_seq.reverse.index(self.sequence.reverse)
		raise PeptideNotInProteinError if pep_index.nil?
		pep_start_i = pep_index
	else
		pep_start_i = prot_seq.index(self.sequence)
		raise PeptideNotInProteinError if pep_start_i.nil?			
	end
	pep_end_i = pep_start_i+self.sequence.length
	{:start => pep_start_i,:end => pep_end_i}
end

#gff_record_for_peptide_fragment(start_i, end_i, parent_record) ⇒ Object


148
149
150
151
152
153
154
155
# File 'lib/protk/peptide.rb', line 148

def gff_record_for_peptide_fragment(start_i,end_i,parent_record)
	cds_id = parent_record.id
	this_id = "#{cds_id}.#{self.sequence}"
	this_id << ".#{self.charge}" unless self.charge.nil?
	score = self.nsp_adjusted_probability.nil? ? "." : self.nsp_adjusted_probability.to_s
	gff_string = "#{parent_record.seqid}\tMSMS\tpolypeptide\t#{start_i}\t#{end_i}\t#{score}\t#{parent_record.strand}\t0\tID=#{this_id};Parent=#{cds_id}"
	Bio::GFF::GFF3::Record.new(gff_string)
end

#to_gff3_records(prot_seq, parent_record, cds_records) ⇒ Object

Returns a list of fragments (hashes with start and end) in GFF style (1 based) genomic coordinates

Assumes that cds_coords is inclusive of the entire protein sequence including start-met

We assume that gff records conform to the spec

www.sequenceontology.org/gff3.shtml

This part of the spec is crucial

  • The START and STOP codons are included in the CDS.

  • That is, if the locations of the start and stop codons are known,

  • the first three base pairs of the CDS should correspond to the start codon

  • and the last three correspond the stop codon.

We also assume that all the cds records provided, actually form part of the protein (ie skipped exons should not be included)


74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
# File 'lib/protk/peptide.rb', line 74

def to_gff3_records(prot_seq,parent_record,cds_records)

	throw "Expected GFF3 Record but got #{parent_record.class}" unless parent_record.class==Bio::GFF::GFF3::Record
	throw "Expected Array but got #{cds_records.class}" unless cds_records.class==Array

	on_reverse_strand = (parent_record.strand=="-") ? true : false
	aa_coords = coords_in_protein(prot_seq,false) # Always use forward protein coordinates

	ordered_cds_records = on_reverse_strand ? cds_records.sort.reverse : cds_records.sort

	# Initial position is the number of NA's from the start of translation
	#
	pep_nalen = self.sequence.length*3

	i = 0; #Current protein position (in nucleic acids)

	pep_start_i = aa_coords[:start]*3
	pep_end_i = pep_start_i+self.sequence.length*3
	fragments=[]
	ordered_cds_records.each do |cds_record|
		# puts cds_record
		fragment = nil
		fragment_len = 0
		if on_reverse_strand

			in_peptide = (i<pep_end_i) && (i>=pep_start_i)
			before_len = [pep_start_i-i,0].max
			# puts before_len
			# puts in_peptide
			# puts "i #{i} pi #{pep_end_i} psi #{pep_start_i}"
			if in_peptide

				fragment_end = cds_record.end
				fragment_len = [cds_record.length,pep_end_i-i].min
				fragment_start = fragment_end-fragment_len+1
				# fragment = {:start=>fragment_start,:end=>fragment_end}
				fragment = gff_record_for_peptide_fragment(fragment_start,fragment_end,cds_record)

			elsif before_len>0
				fragment_end = cds_record.end - before_len
				fragment_len = [cds_record.length-before_len,pep_end_i-i-before_len].min
				# puts "Frag len #{fragment_len}"
				fragment_start = fragment_end - fragment_len + 1
				fragment = gff_record_for_peptide_fragment(fragment_start,fragment_end,cds_record)
				# fragment = {:start=>fragment_start,:end=>fragment_end}
			else
				fragment=nil
			end				
		else
			in_peptide = (i<pep_end_i) && (i>=pep_start_i)
			before_len = [pep_start_i-i,0].max
			if in_peptide
				fragment_start = cds_record.start
				fragment_len = [cds_record.length,pep_end_i-i].min
				fragment_end = fragment_start+fragment_len-1
				# fragment = {:start=>fragment_start,:end=>fragment_end}
				fragment = gff_record_for_peptide_fragment(fragment_start,fragment_end,cds_record)
			elsif before_len>0
				fragment_start = cds_record.start + before_len
				fragment_len = [cds_record.length-before_len,pep_end_i-i-before_len].min
				fragment_end = fragment_start + fragment_len-1
				# fragment = {:start=>fragment_start,:end=>fragment_end}
				fragment = gff_record_for_peptide_fragment(fragment_start,fragment_end,cds_record)
			else
				fragment=nil
			end

		end
		i+=cds_record.length
		fragments << fragment unless fragment.nil?
	end
	fragments
end