Class: Peptide

Inherits:

Object

Object
Peptide

show all

Defined in:: lib/protk/peptide.rb

Instance Attribute Summary collapse

#charge ⇒ Object

Returns the value of attribute charge.
#nsp_adjusted_probability ⇒ Object

Returns the value of attribute nsp_adjusted_probability.
#protein_name ⇒ Object

Returns the value of attribute protein_name.
#sequence ⇒ Object

Returns the value of attribute sequence.

Class Method Summary collapse

Instance Method Summary collapse

#coords_in_protein(prot_seq, reverse = false) ⇒ Object

Expects prot_seq not to contain explicit stop codon (ie * at end) AA coords are 0-based unlike genomic coords which are 1 based.
#gff_record_for_peptide_fragment(start_i, end_i, parent_record) ⇒ Object
#initialize ⇒ Peptide constructor

A new instance of Peptide.
#to_gff3_records(prot_seq, parent_record, cds_records) ⇒ Object

Returns a list of fragments (hashes with start and end) in GFF style (1 based) genomic coordinates.

Constructor Details

#initialize ⇒ `Peptide`

Returns a new instance of Peptide.



36
37
38

# File 'lib/protk/peptide.rb', line 36

def initialize()

end

Instance Attribute Details

#charge ⇒ `Object`

Returns the value of attribute charge.



13
14
15

# File 'lib/protk/peptide.rb', line 13

def charge
  @charge
end

#nsp_adjusted_probability ⇒ `Object`

Returns the value of attribute nsp_adjusted_probability.



14
15
16

# File 'lib/protk/peptide.rb', line 14

def nsp_adjusted_probability
  @nsp_adjusted_probability
end

#protein_name ⇒ `Object`

Returns the value of attribute protein_name.



12
13
14

# File 'lib/protk/peptide.rb', line 12

def protein_name
  @protein_name
end

#sequence ⇒ `Object`

Returns the value of attribute sequence.



11
12
13

# File 'lib/protk/peptide.rb', line 11

def sequence
  @sequence
end

Class Method Details

.from_protxml(xmlnode) ⇒ `Object`

# File 'lib/protk/peptide.rb', line 19

def from_protxml(xmlnode)
	pep=new()
	pep.sequence=xmlnode['peptide_sequence']
	pep.nsp_adjusted_probability=xmlnode['nsp_adjusted_probability'].to_f
	pep.charge=xmlnode['charge'].to_i
	pep
end

.from_sequence(seq, charge = nil) ⇒ `Object`

# File 'lib/protk/peptide.rb', line 27

def from_sequence(seq,charge=nil)
	pep=new()
	pep.sequence=seq
	pep.charge=charge
	pep
end

Instance Method Details

#coords_in_protein(prot_seq, reverse = false) ⇒ `Object`

Expects prot_seq not to contain explicit stop codon (ie * at end) AA coords are 0-based unlike genomic coords which are 1 based

# File 'lib/protk/peptide.rb', line 43

def coords_in_protein(prot_seq,reverse=false)
	if reverse
		pep_index = prot_seq.reverse.index(self.sequence.reverse)
		raise PeptideNotInProteinError if pep_index.nil?
		pep_start_i = pep_index
	else
		pep_start_i = prot_seq.index(self.sequence)
		raise PeptideNotInProteinError if pep_start_i.nil?			
	end
	pep_end_i = pep_start_i+self.sequence.length
	{:start => pep_start_i,:end => pep_end_i}
end

#gff_record_for_peptide_fragment(start_i, end_i, parent_record) ⇒ `Object`

# File 'lib/protk/peptide.rb', line 148

def gff_record_for_peptide_fragment(start_i,end_i,parent_record)
	cds_id = parent_record.id
	this_id = "#{cds_id}.#{self.sequence}"
	this_id << ".#{self.charge}" unless self.charge.nil?
	score = self.nsp_adjusted_probability.nil? ? "." : self.nsp_adjusted_probability.to_s
	gff_string = "#{parent_record.seqid}\tMSMS\tpolypeptide\t#{start_i}\t#{end_i}\t#{score}\t#{parent_record.strand}\t0\tID=#{this_id};Parent=#{cds_id}"
	Bio::GFF::GFF3::Record.new(gff_string)
end

#to_gff3_records(prot_seq, parent_record, cds_records) ⇒ `Object`

Returns a list of fragments (hashes with start and end) in GFF style (1 based) genomic coordinates

Assumes that cds_coords is inclusive of the entire protein sequence including start-met

We assume that gff records conform to the spec

www.sequenceontology.org/gff3.shtml

This part of the spec is crucial

The START and STOP codons are included in the CDS.
That is, if the locations of the start and stop codons are known,
the first three base pairs of the CDS should correspond to the start codon
and the last three correspond the stop codon.

We also assume that all the cds records provided, actually form part of the protein (ie skipped exons should not be included)

# File 'lib/protk/peptide.rb', line 74

def to_gff3_records(prot_seq,parent_record,cds_records)

	throw "Expected GFF3 Record but got #{parent_record.class}" unless parent_record.class==Bio::GFF::GFF3::Record
	throw "Expected Array but got #{cds_records.class}" unless cds_records.class==Array

	on_reverse_strand = (parent_record.strand=="-") ? true : false
	aa_coords = coords_in_protein(prot_seq,false) # Always use forward protein coordinates

	ordered_cds_records = on_reverse_strand ? cds_records.sort.reverse : cds_records.sort

	# Initial position is the number of NA's from the start of translation
	#
	pep_nalen = self.sequence.length*3

	i = 0; #Current protein position (in nucleic acids)

	pep_start_i = aa_coords[:start]*3
	pep_end_i = pep_start_i+self.sequence.length*3
	fragments=[]
	ordered_cds_records.each do |cds_record|
		# puts cds_record
		fragment = nil
		fragment_len = 0
		if on_reverse_strand

			in_peptide = (i<pep_end_i) && (i>=pep_start_i)
			before_len = [pep_start_i-i,0].max
			# puts before_len
			# puts in_peptide
			# puts "i #{i} pi #{pep_end_i} psi #{pep_start_i}"
			if in_peptide

				fragment_end = cds_record.end
				fragment_len = [cds_record.length,pep_end_i-i].min
				fragment_start = fragment_end-fragment_len+1
				# fragment = {:start=>fragment_start,:end=>fragment_end}
				fragment = gff_record_for_peptide_fragment(fragment_start,fragment_end,cds_record)

			elsif before_len>0
				fragment_end = cds_record.end - before_len
				fragment_len = [cds_record.length-before_len,pep_end_i-i-before_len].min
				# puts "Frag len #{fragment_len}"
				fragment_start = fragment_end - fragment_len + 1
				fragment = gff_record_for_peptide_fragment(fragment_start,fragment_end,cds_record)
				# fragment = {:start=>fragment_start,:end=>fragment_end}
			else
				fragment=nil
			end				
		else
			in_peptide = (i<pep_end_i) && (i>=pep_start_i)
			before_len = [pep_start_i-i,0].max
			if in_peptide
				fragment_start = cds_record.start
				fragment_len = [cds_record.length,pep_end_i-i].min
				fragment_end = fragment_start+fragment_len-1
				# fragment = {:start=>fragment_start,:end=>fragment_end}
				fragment = gff_record_for_peptide_fragment(fragment_start,fragment_end,cds_record)
			elsif before_len>0
				fragment_start = cds_record.start + before_len
				fragment_len = [cds_record.length-before_len,pep_end_i-i-before_len].min
				fragment_end = fragment_start + fragment_len-1
				# fragment = {:start=>fragment_start,:end=>fragment_end}
				fragment = gff_record_for_peptide_fragment(fragment_start,fragment_end,cds_record)
			else
				fragment=nil
			end

		end
		i+=cds_record.length
		fragments << fragment unless fragment.nil?
	end
	fragments
end

Class: Peptide

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize ⇒ Peptide

Instance Attribute Details

#charge ⇒ Object

#nsp_adjusted_probability ⇒ Object

#protein_name ⇒ Object

#sequence ⇒ Object