Class: Bioroebe::Protein

Inherits:
Sequence show all
Defined in:
lib/bioroebe/sequence/protein.rb

Overview

Bioroebe::Protein

Constant Summary collapse

ARRAY_THESE_METHODS_ARE_NOT_IN_USE_FOR_THIS_CLASS =
#

ARRAY_THESE_METHODS_ARE_NOT_IN_USE_FOR_THIS_CLASS

All methods that should not be a part of the class Protein, can be defined in the following Array. They will be automatically removed as part of the clean-up happening in the method reset().

#
%i(
  n_uracil?
)

Constants inherited from Sequence

Sequence::REMOVE_INVALID_CHARACTERS, Sequence::SHALL_WE_UPCASE

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from Sequence

[], #automatic_support_for_nucleotides, #description?, #index, #infer_type, #is_DNA?, #is_RNA?, #is_a_protein?, #is_a_protein_now, #map, #n_uracil?, #randomize, #remove_invalid_entries_from_the_dna_sequence, #remove_invalid_entries_from_the_dna_sequence!, #return_string_nucleotides_or_aminoacids, #sanitize_dataset, #sanitize_rna, #save_sequence_to_this_file, sequence_from_file, #set_description, #set_dna, #set_protein, #set_rna, #set_save_file, #set_sequence, #set_type, #shall_we_upcase?, #size?, #to_genbank, #to_regexp, #type?

Methods inherited from RawSequence

#+, #<<, #[]=, #calculate_levensthein_distance, #chars?, #complement, #composition?, #count, #delete, #delete!, #downcase, #each_char, #empty?, #find_substring_indices, #first_position=, #freeze, #gsub, #gsub!, #include?, #insert_at_this_position, #prepend, #remove_n_characters_from_the_left_side, #reverse, #reverse!, #reverse_complement, #scan, #set_raw_sequence, #shuffle, #size?, #split, #start_with?, #strip, #subseq, #to_s, #to_str, #tr!, #upcase!

Constructor Details

#initialize(this_sequence = 'ATCG') ⇒ Protein

#

initialize

#


38
39
40
41
42
43
44
45
46
# File 'lib/bioroebe/sequence/protein.rb', line 38

def initialize(
    this_sequence  = 'ATCG'
  )
  reset
  set_protein_type # Make sure we have a protein sequence.
  set_delegate_string(
    this_sequence
  )
end

Class Method Details

.name(this_aminoacid = 'A') ⇒ Object

#

Bioroebe::Protein.name

Usage example:

Bioroebe::Protein.name('A') # => "alanine"
#


306
307
308
# File 'lib/bioroebe/sequence/protein.rb', line 306

def self.name(this_aminoacid = 'A')
  Bioroebe.return_long_name_of_this_aminoacid(this_aminoacid).downcase
end

.one_to_three(this_aminoacid = 'A') ⇒ Object

#

Bioroebe::Protein.one_to_three

Usage example:

Bioroebe::Protein.one_to_three('A') # => "Ala"
#


291
292
293
294
295
296
# File 'lib/bioroebe/sequence/protein.rb', line 291

def self.one_to_three(this_aminoacid = 'A')
  if this_aminoacid.is_a? Array
    this_aminoacid = this_aminoacid.join(' ').strip
  end
  return ::Bioroebe.one_to_three(this_aminoacid)
end

.to_1(this_aminoacid = 'alanine') ⇒ Object

#

Bioroebe::Protein.to_1

This will convert from the long name of an aminoacid, such as ‘alanine’, to the short one-letter abbreviation.

Usage example:

Bioroebe::Protein.to_1('alanine') # => "A"
#


321
322
323
324
325
326
327
328
# File 'lib/bioroebe/sequence/protein.rb', line 321

def self.to_1(this_aminoacid = 'alanine')
  if this_aminoacid.is_a? Array
    this_aminoacid = this_aminoacid.join(' ').strip
  end
  this_aminoacid.downcase!
  dataset = YAML.load_file(Bioroebe.file_amino_acids_long_name_to_one_letter)
  return dataset[this_aminoacid]
end

Instance Method Details

#aliphatic_index(a = 2.9, b = 3.9) ⇒ Object

#

aliphatic_index

This method will calculate the aliphatic acid of an aminoacid, aka a Protein.

Usage example:

require 'bioroebe'; Bioroebe::Protein.new('MVKSYDRYEYEDCLGIVNSKSSNCVFLNNA').aliphatic_index # => 71.33333
#


268
269
270
271
272
273
274
275
276
277
278
279
280
281
# File 'lib/bioroebe/sequence/protein.rb', line 268

def aliphatic_index(
    a = 2.9,
    b = 3.9
  )
 composition = {}
 composition.default = 0
 chars = sequence?.chars
 chars.each { |entry| composition[entry] += 1} 
 (
   composition['A'] + 
   a * composition['V'].to_f + 
   b * (composition['I'] + composition['L'])
 ).to_f / length.to_f * 100
end

#can_be_N_glycosylated?Boolean

#

can_be_N_glycosylated?

This method will return true if we can find a subsequence of N-X-S/T.

Usage example:

Bioroebe::Protein.new('MLKLKCNAS').can_be_N_glycosylated? # => true
Bioroebe::Protein.new('MLKLKCNAT').can_be_N_glycosylated? # => true
Bioroebe::Protein.new('MLKLKCNAC').can_be_N_glycosylated? # => false
#

Returns:

  • (Boolean)


83
84
85
86
87
88
89
90
91
92
93
94
# File 'lib/bioroebe/sequence/protein.rb', line 83

def can_be_N_glycosylated?
  result = false
  _ = sequence?
  # ======================================================================= #
  # See: https://rubular.com/r/cHCqqD1FxD0PJL
  # ======================================================================= #
  use_this_regex = /N[ACFGHIKLMNOQRSTUVWY][S|T]/
  if _ =~ use_this_regex
    result = true
  end
  return result
end

#can_be_stained_via_coomassie?Boolean

#

can_be_stained_via_coomassie?

Determine whether a protein can be stained in a coomassie staining. This is just a simple, fairly dumb check and NOT guaranteed to be correct. If anyone has a better way to check for this let me know.

#

Returns:

  • (Boolean)


249
250
251
252
253
254
255
# File 'lib/bioroebe/sequence/protein.rb', line 249

def can_be_stained_via_coomassie?
  disallowed_aminoacids = %w( K H R W F Y)
  sequence = sequence?
  return sequence.chars.uniq.any? {|this_aminoacid|
    disallowed_aminoacids.include? this_aminoacid
  }
end

#can_form_disulfide_bonds?Boolean

#

can_form_disulfide_bonds?

This method will return true if there are at the least two cysteines in the aminoacid sequence of this protein.

If anyone knows of a better algorithm to determine whether a protein can REALLY create a disulfide bond let me know.

Usage example:

Bioroebe::Protein.new('MLKLKNASCCEEE').can_form_disulfide_bonds? # => true
Bioroebe::Protein.new('MLKLKNASCEEE').can_form_disulfide_bonds?  # => false
#

Returns:

  • (Boolean)


111
112
113
# File 'lib/bioroebe/sequence/protein.rb', line 111

def can_form_disulfide_bonds?
  sequence?.count('C').size > 1
end

#hydrophobic_amino_acids?(hydrophobic_aminoacids = %w( A I L M V F W Y )) ⇒ Boolean

#

hydrophobic_amino_acids?

This method will return an Array. This Array will contain all positions, as numbers, of aminoacids that are hydrophobic. These entries will start at position 1, so the first aminoacid, if it is hydrophobic, will be part of the Array, such as [1].

#

Returns:

  • (Boolean)


225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
# File 'lib/bioroebe/sequence/protein.rb', line 225

def hydrophobic_amino_acids?(
    hydrophobic_aminoacids = %w(
      A I L M V F W Y
    )
  )
  result = []
   # Used to be: G A V L I P F M W 
  chars = sequence?.chars
  chars.each_with_index {|this_aminoacid, index|
    if hydrophobic_aminoacids.include? this_aminoacid
      result << index+1
    end
  }
  return result
end

#is_glycosylated?Boolean Also known as: glycosylated?

#

is_glycosylated?

#

Returns:

  • (Boolean)


213
214
215
# File 'lib/bioroebe/sequence/protein.rb', line 213

def is_glycosylated?
  @is_glycosylated
end

#resetObject

#

reset

#


51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# File 'lib/bioroebe/sequence/protein.rb', line 51

def reset
  super()
  # ======================================================================= #
  # === @is_glycosylated
  #
  # Some proteins are glycosylated, that is, they carry a glycosylated
  # structure. Glycosylated proteins are sometimes called "glycoproteins".
  #
  # Two major types of protein glycosylation are known:
  #
  #   1) N-linked glycans containing asparagine-X-serine/threonine
  #      N-X-S/T.
  #
  #   2) O-linked glycans attached to the hydroxyl-oxygen of either
  #      serine, threonine, tyrosine, hydroxylysine or hydroxyproline.
  #
  # ======================================================================= #
  @is_glycosylated = false
end

#reverse_translateObject Also known as: revtrans

#

reverse_translate

Usage example:

x = Bioroebe::Protein.new('MSKADYEK'); puts x.reverse_translate; '' # => AUGAGCAAGGCCGACUACGAGAAG
#


205
206
207
208
# File 'lib/bioroebe/sequence/protein.rb', line 205

def reverse_translate
  require 'bioroebe/codons/codons.rb'
  ::Bioroebe.deduce_most_likely_aminoacid_sequence_as_string(sequence?)
end

#set_delegate_string(i) ⇒ Object

#

set_delegate_string

This method ultimately sets the main sequence to be had for this class, but it will do so after checking for aminoacids.

#


121
122
123
124
125
126
127
128
129
130
131
132
133
134
# File 'lib/bioroebe/sequence/protein.rb', line 121

def set_delegate_string(i)
  i = i.first if i.is_a? Array
  # ======================================================================= #
  # === Ensure only valid aminoacids past this point
  # ======================================================================= #
  unless i.scan(/(B|J)/).empty?
    _ = $1.to_s.dup
    e "Invalid aminoacid passed in: `#{_}`"
    raise InvalidAminoacid.new(_),
      'only valid aminoacids may be passed to this '\
      'method. Invalid aminoacids include "B" and "J".'
  end if i
  set_string(i)
end

#to_dnaObject

#

to_dna

#


193
194
195
# File 'lib/bioroebe/sequence/protein.rb', line 193

def to_dna
  to_rna.tr('U','T')
end

#to_rnaObject Also known as: aminoacid_to_codon

#

to_rna

Convert the main sequence to RNA and return that result.

Note that currently the behaviour will determine the “most likely codon sequence” for a given aminoacid sequence. Evidently this will NOT always be absolutely accurate for reallife organisms, so keep this in mind when using this method here.

#


177
178
179
180
181
182
183
184
185
186
187
188
# File 'lib/bioroebe/sequence/protein.rb', line 177

def to_rna
  require 'bioroebe/codons/codons.rb'
  require 'bioroebe/nucleotides/most_likely_nucleotide_sequence_for_this_aminoacid_sequence.rb'
  _ = @sequence
  result = Bioroebe.return_the_most_likely_codon_sequence_for_this_aminoacid_sequence(
    _
  )
  if result.is_a? Array
    result = result.join
  end
  return result.tr('T','U')
end

#weight?Boolean Also known as: molecular_weight?

#

weight?

This method will return the total weight of this aminoacid sequence.

The standard weight assigned to each member of the 20-symbol amino acid alphabet is the “monoisotopic mass” of the corresponding amino acid.

#

Returns:

  • (Boolean)


151
152
153
154
155
156
157
158
# File 'lib/bioroebe/sequence/protein.rb', line 151

def weight?
  sum = 0
  chars = string?.chars
  chars.each {|this_aminoacid|
    sum += weight_of(this_aminoacid)
  }
  return sum
end

#weight_of(i) ⇒ Object

#

weight_of

#


163
164
165
# File 'lib/bioroebe/sequence/protein.rb', line 163

def weight_of(i)
  ::Bioroebe.amino_acid_monoisotopic_mass(i).to_f
end