Class: Bioroebe::Protein
Overview
Constant Summary
collapse
- ARRAY_THESE_METHODS_ARE_NOT_IN_USE_FOR_THIS_CLASS =
#
ARRAY_THESE_METHODS_ARE_NOT_IN_USE_FOR_THIS_CLASS
All methods that should not be a part of the class Protein, can be defined in the following Array. They will be automatically removed as part of the clean-up happening in the method reset().
#
%i(
n_uracil?
)
Constants inherited
from Sequence
Sequence::REMOVE_INVALID_CHARACTERS, Sequence::SHALL_WE_UPCASE
Class Method Summary
collapse
Instance Method Summary
collapse
Methods inherited from Sequence
[], #automatic_support_for_nucleotides, #description?, #index, #infer_type, #is_DNA?, #is_RNA?, #is_a_protein?, #is_a_protein_now, #map, #n_uracil?, #randomize, #remove_invalid_entries_from_the_dna_sequence, #remove_invalid_entries_from_the_dna_sequence!, #return_string_nucleotides_or_aminoacids, #sanitize_dataset, #sanitize_rna, #save_sequence_to_this_file, sequence_from_file, #set_description, #set_dna, #set_protein, #set_rna, #set_save_file, #set_sequence, #set_type, #shall_we_upcase?, #size?, #to_genbank, #to_regexp, #type?
Methods inherited from RawSequence
#+, #<<, #[]=, #calculate_levensthein_distance, #chars?, #complement, #composition?, #count, #delete, #delete!, #downcase, #each_char, #empty?, #find_substring_indices, #first_position=, #freeze, #gsub, #gsub!, #include?, #insert_at_this_position, #prepend, #remove_n_characters_from_the_left_side, #reverse, #reverse!, #reverse_complement, #scan, #set_raw_sequence, #shuffle, #size?, #split, #start_with?, #strip, #subseq, #to_s, #to_str, #tr!, #upcase!
Constructor Details
#initialize(this_sequence = 'ATCG') ⇒ Protein
38
39
40
41
42
43
44
45
46
|
# File 'lib/bioroebe/sequence/protein.rb', line 38
def initialize(
this_sequence = 'ATCG'
)
reset
set_protein_type set_delegate_string(
this_sequence
)
end
|
Class Method Details
.name(this_aminoacid = 'A') ⇒ Object
306
307
308
|
# File 'lib/bioroebe/sequence/protein.rb', line 306
def self.name(this_aminoacid = 'A')
Bioroebe.return_long_name_of_this_aminoacid(this_aminoacid).downcase
end
|
.one_to_three(this_aminoacid = 'A') ⇒ Object
#
Bioroebe::Protein.one_to_three
Usage example:
Bioroebe::Protein.one_to_three('A')
#
291
292
293
294
295
296
|
# File 'lib/bioroebe/sequence/protein.rb', line 291
def self.one_to_three(this_aminoacid = 'A')
if this_aminoacid.is_a? Array
this_aminoacid = this_aminoacid.join(' ').strip
end
return ::Bioroebe.one_to_three(this_aminoacid)
end
|
.to_1(this_aminoacid = 'alanine') ⇒ Object
#
Bioroebe::Protein.to_1
This will convert from the long name of an aminoacid, such as ‘alanine’, to the short one-letter abbreviation.
Usage example:
Bioroebe::Protein.to_1('alanine')
#
321
322
323
324
325
326
327
328
|
# File 'lib/bioroebe/sequence/protein.rb', line 321
def self.to_1(this_aminoacid = 'alanine')
if this_aminoacid.is_a? Array
this_aminoacid = this_aminoacid.join(' ').strip
end
this_aminoacid.downcase!
dataset = YAML.load_file(Bioroebe.file_amino_acids_long_name_to_one_letter)
return dataset[this_aminoacid]
end
|
Instance Method Details
#aliphatic_index(a = 2.9, b = 3.9) ⇒ Object
#
aliphatic_index
This method will calculate the aliphatic acid of an aminoacid, aka a Protein.
Usage example:
require 'bioroebe'; Bioroebe::Protein.new('MVKSYDRYEYEDCLGIVNSKSSNCVFLNNA').aliphatic_index
#
268
269
270
271
272
273
274
275
276
277
278
279
280
281
|
# File 'lib/bioroebe/sequence/protein.rb', line 268
def aliphatic_index(
a = 2.9,
b = 3.9
)
composition = {}
composition.default = 0
chars = sequence?.chars
chars.each { |entry| composition[entry] += 1}
(
composition['A'] +
a * composition['V'].to_f +
b * (composition['I'] + composition['L'])
).to_f / length.to_f * 100
end
|
#can_be_N_glycosylated? ⇒ Boolean
#
can_be_N_glycosylated?
This method will return true if we can find a subsequence of N-X-S/T.
Usage example:
Bioroebe::Protein.new('MLKLKCNAS').can_be_N_glycosylated? Bioroebe::Protein.new('MLKLKCNAT').can_be_N_glycosylated? Bioroebe::Protein.new('MLKLKCNAC').can_be_N_glycosylated?
#
83
84
85
86
87
88
89
90
91
92
93
94
|
# File 'lib/bioroebe/sequence/protein.rb', line 83
def can_be_N_glycosylated?
result = false
_ = sequence?
use_this_regex = /N[ACFGHIKLMNOQRSTUVWY][S|T]/
if _ =~ use_this_regex
result = true
end
return result
end
|
#can_be_stained_via_coomassie? ⇒ Boolean
#
can_be_stained_via_coomassie?
Determine whether a protein can be stained in a coomassie staining. This is just a simple, fairly dumb check and NOT guaranteed to be correct. If anyone has a better way to check for this let me know.
#
249
250
251
252
253
254
255
|
# File 'lib/bioroebe/sequence/protein.rb', line 249
def can_be_stained_via_coomassie?
disallowed_aminoacids = %w( K H R W F Y)
sequence = sequence?
return sequence.chars.uniq.any? {|this_aminoacid|
disallowed_aminoacids.include? this_aminoacid
}
end
|
#
This method will return true if there are at the least two cysteines in the aminoacid sequence of this protein.
If anyone knows of a better algorithm to determine whether a protein can REALLY create a disulfide bond let me know.
Usage example:
Bioroebe::Protein.new('MLKLKNASCCEEE').can_form_disulfide_bonds? Bioroebe::Protein.new('MLKLKNASCEEE').can_form_disulfide_bonds?
#
111
112
113
|
# File 'lib/bioroebe/sequence/protein.rb', line 111
def can_form_disulfide_bonds?
sequence?.count('C').size > 1
end
|
#hydrophobic_amino_acids?(hydrophobic_aminoacids = %w(
A I L M V F W Y
)) ⇒ Boolean
#
hydrophobic_amino_acids?
This method will return an Array. This Array will contain all positions, as numbers, of aminoacids that are hydrophobic. These entries will start at position 1, so the first aminoacid, if it is hydrophobic, will be part of the Array, such as [1].
#
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
|
# File 'lib/bioroebe/sequence/protein.rb', line 225
def hydrophobic_amino_acids?(
hydrophobic_aminoacids = %w(
A I L M V F W Y
)
)
result = []
chars = sequence?.chars
chars.each_with_index {|this_aminoacid, index|
if hydrophobic_aminoacids.include? this_aminoacid
result << index+1
end
}
return result
end
|
#is_glycosylated? ⇒ Boolean
Also known as:
glycosylated?
213
214
215
|
# File 'lib/bioroebe/sequence/protein.rb', line 213
def is_glycosylated?
@is_glycosylated
end
|
#reset ⇒ Object
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
|
# File 'lib/bioroebe/sequence/protein.rb', line 51
def reset
super()
@is_glycosylated = false
end
|
#reverse_translate ⇒ Object
Also known as:
revtrans
#
reverse_translate
Usage example:
x = Bioroebe::Protein.new('MSKADYEK'); puts x.reverse_translate; ''
#
#set_delegate_string(i) ⇒ Object
#
set_delegate_string
This method ultimately sets the main sequence to be had for this class, but it will do so after checking for aminoacids.
#
121
122
123
124
125
126
127
128
129
130
131
132
133
134
|
# File 'lib/bioroebe/sequence/protein.rb', line 121
def set_delegate_string(i)
i = i.first if i.is_a? Array
unless i.scan(/(B|J)/).empty?
_ = $1.to_s.dup
e "Invalid aminoacid passed in: `#{_}`"
raise InvalidAminoacid.new(_),
'only valid aminoacids may be passed to this '\
'method. Invalid aminoacids include "B" and "J".'
end if i
set_string(i)
end
|
#to_dna ⇒ Object
193
194
195
|
# File 'lib/bioroebe/sequence/protein.rb', line 193
def to_dna
to_rna.tr('U','T')
end
|
#to_rna ⇒ Object
Also known as:
aminoacid_to_codon
#
to_rna
Convert the main sequence to RNA and return that result.
Note that currently the behaviour will determine the “most likely codon sequence” for a given aminoacid sequence. Evidently this will NOT always be absolutely accurate for reallife organisms, so keep this in mind when using this method here.
#
177
178
179
180
181
182
183
184
185
186
187
188
|
# File 'lib/bioroebe/sequence/protein.rb', line 177
def to_rna
require 'bioroebe/codons/codons.rb'
require 'bioroebe/nucleotides/most_likely_nucleotide_sequence_for_this_aminoacid_sequence.rb'
_ = @sequence
result = Bioroebe.return_the_most_likely_codon_sequence_for_this_aminoacid_sequence(
_
)
if result.is_a? Array
result = result.join
end
return result.tr('T','U')
end
|
#weight? ⇒ Boolean
Also known as:
molecular_weight?
#
weight?
This method will return the total weight of this aminoacid sequence.
The standard weight assigned to each member of the 20-symbol amino acid alphabet is the “monoisotopic mass” of the corresponding amino acid.
#
151
152
153
154
155
156
157
158
|
# File 'lib/bioroebe/sequence/protein.rb', line 151
def weight?
sum = 0
chars = string?.chars
chars.each {|this_aminoacid|
sum += weight_of(this_aminoacid)
}
return sum
end
|