Class: Bioroebe::Protein

Inherits:

Object
RawSequence
Sequence
Bioroebe::Protein

Defined in:: lib/bioroebe/sequence/protein.rb

Overview

Bioroebe::Protein

Constant Summary collapse

ARRAY_THESE_METHODS_ARE_NOT_IN_USE_FOR_THIS_CLASS = # ARRAY_THESE_METHODS_ARE_NOT_IN_USE_FOR_THIS_CLASS All methods that should not be a part of the class Protein, can be defined in the following Array. They will be automatically removed as part of the clean-up happening in the method reset(). #

%i(
  n_uracil?
)

Constants inherited from Sequence

Sequence::REMOVE_INVALID_CHARACTERS, Sequence::SHALL_WE_UPCASE

Class Method Summary collapse

.name(this_aminoacid = 'A') ⇒ Object

# === Bioroebe::Protein.name.
.one_to_three(this_aminoacid = 'A') ⇒ Object

# === Bioroebe::Protein.one_to_three.
.to_1(this_aminoacid = 'alanine') ⇒ Object

# === Bioroebe::Protein.to_1.

Instance Method Summary collapse

#aliphatic_index(a = 2.9, b = 3.9) ⇒ Object

# === aliphatic_index.
#can_be_N_glycosylated? ⇒ Boolean

# === can_be_N_glycosylated?.
#can_be_stained_via_coomassie? ⇒ Boolean

# === can_be_stained_via_coomassie?.
#can_form_disulfide_bonds? ⇒ Boolean

# === can_form_disulfide_bonds?.
#hydrophobic_amino_acids?(hydrophobic_aminoacids = %w( A I L M V F W Y )) ⇒ Boolean

# === hydrophobic_amino_acids?.
#initialize(this_sequence = 'ATCG') ⇒ Protein constructor

# === initialize ========================================================================= #.
#is_glycosylated? ⇒ Boolean (also: #glycosylated?)

# === is_glycosylated? ========================================================================= #.
#reset ⇒ Object

# === reset ========================================================================= #.
#reverse_translate ⇒ Object (also: #revtrans)

# === reverse_translate.
#set_delegate_string(i) ⇒ Object

# === set_delegate_string.
#to_dna ⇒ Object

# === to_dna ========================================================================= #.
#to_rna ⇒ Object (also: #aminoacid_to_codon)

# === to_rna.
#weight? ⇒ Boolean (also: #molecular_weight?)

# === weight?.
#weight_of(i) ⇒ Object

# === weight_of ========================================================================= #.

Methods inherited from Sequence

[], #automatic_support_for_nucleotides, #description?, #index, #infer_type, #is_DNA?, #is_RNA?, #is_a_protein?, #is_a_protein_now, #map, #n_uracil?, #randomize, #remove_invalid_entries_from_the_dna_sequence, #remove_invalid_entries_from_the_dna_sequence!, #return_string_nucleotides_or_aminoacids, #sanitize_dataset, #sanitize_rna, #save_sequence_to_this_file, sequence_from_file, #set_description, #set_dna, #set_protein, #set_rna, #set_save_file, #set_sequence, #set_type, #shall_we_upcase?, #size?, #to_genbank, #to_regexp, #type?

Methods inherited from RawSequence

#+, #<<, #[]=, #calculate_levensthein_distance, #chars?, #complement, #composition?, #count, #delete, #delete!, #downcase, #each_char, #empty?, #find_substring_indices, #first_position=, #freeze, #gsub, #gsub!, #include?, #insert_at_this_position, #prepend, #remove_n_characters_from_the_left_side, #reverse, #reverse!, #reverse_complement, #scan, #set_raw_sequence, #shuffle, #size?, #split, #start_with?, #strip, #subseq, #to_s, #to_str, #tr!, #upcase!

Constructor Details

#initialize(this_sequence = 'ATCG') ⇒ `Protein`

#

initialize

#

# File 'lib/bioroebe/sequence/protein.rb', line 38

def initialize(
    this_sequence  = 'ATCG'
  )
  reset
  set_protein_type # Make sure we have a protein sequence.
  set_delegate_string(
    this_sequence
  )
end

Class Method Details

.name(this_aminoacid = 'A') ⇒ `Object`

#

Bioroebe::Protein.name

Usage example:

Bioroebe::Protein.name('A') # => "alanine"

#



306
307
308

# File 'lib/bioroebe/sequence/protein.rb', line 306

def self.name(this_aminoacid = 'A')
  Bioroebe.return_long_name_of_this_aminoacid(this_aminoacid).downcase
end

.one_to_three(this_aminoacid = 'A') ⇒ `Object`

#

Bioroebe::Protein.one_to_three

Usage example:

Bioroebe::Protein.one_to_three('A') # => "Ala"

#

# File 'lib/bioroebe/sequence/protein.rb', line 291

def self.one_to_three(this_aminoacid = 'A')
  if this_aminoacid.is_a? Array
    this_aminoacid = this_aminoacid.join(' ').strip
  end
  return ::Bioroebe.one_to_three(this_aminoacid)
end

.to_1(this_aminoacid = 'alanine') ⇒ `Object`

#

Bioroebe::Protein.to_1

This will convert from the long name of an aminoacid, such as ‘alanine’, to the short one-letter abbreviation.

Usage example:

Bioroebe::Protein.to_1('alanine') # => "A"

#

# File 'lib/bioroebe/sequence/protein.rb', line 321

def self.to_1(this_aminoacid = 'alanine')
  if this_aminoacid.is_a? Array
    this_aminoacid = this_aminoacid.join(' ').strip
  end
  this_aminoacid.downcase!
  dataset = YAML.load_file(Bioroebe.file_amino_acids_long_name_to_one_letter)
  return dataset[this_aminoacid]
end

Instance Method Details

#aliphatic_index(a = 2.9, b = 3.9) ⇒ `Object`

#

aliphatic_index

This method will calculate the aliphatic acid of an aminoacid, aka a Protein.

Usage example:

require 'bioroebe'; Bioroebe::Protein.new('MVKSYDRYEYEDCLGIVNSKSSNCVFLNNA').aliphatic_index # => 71.33333

#

# File 'lib/bioroebe/sequence/protein.rb', line 268

def aliphatic_index(
    a = 2.9,
    b = 3.9
  )
 composition = {}
 composition.default = 0
 chars = sequence?.chars
 chars.each { |entry| composition[entry] += 1} 
 (
   composition['A'] + 
   a * composition['V'].to_f + 
   b * (composition['I'] + composition['L'])
 ).to_f / length.to_f * 100
end

#can_be_N_glycosylated? ⇒ `Boolean`

#

can_be_N_glycosylated?

This method will return true if we can find a subsequence of N-X-S/T.

Usage example:

Bioroebe::Protein.new('MLKLKCNAS').can_be_N_glycosylated? # => true
Bioroebe::Protein.new('MLKLKCNAT').can_be_N_glycosylated? # => true
Bioroebe::Protein.new('MLKLKCNAC').can_be_N_glycosylated? # => false

#

Returns:

(Boolean)

# File 'lib/bioroebe/sequence/protein.rb', line 83

def can_be_N_glycosylated?
  result = false
  _ = sequence?
  # ======================================================================= #
  # See: https://rubular.com/r/cHCqqD1FxD0PJL
  # ======================================================================= #
  use_this_regex = /N[ACFGHIKLMNOQRSTUVWY][S|T]/
  if _ =~ use_this_regex
    result = true
  end
  return result
end

#can_be_stained_via_coomassie? ⇒ `Boolean`

#

can_be_stained_via_coomassie?

Determine whether a protein can be stained in a coomassie staining. This is just a simple, fairly dumb check and NOT guaranteed to be correct. If anyone has a better way to check for this let me know.

#

Returns:

(Boolean)

# File 'lib/bioroebe/sequence/protein.rb', line 249

def can_be_stained_via_coomassie?
  disallowed_aminoacids = %w( K H R W F Y)
  sequence = sequence?
  return sequence.chars.uniq.any? {|this_aminoacid|
    disallowed_aminoacids.include? this_aminoacid
  }
end

#can_form_disulfide_bonds? ⇒ `Boolean`

#

can_form_disulfide_bonds?

This method will return true if there are at the least two cysteines in the aminoacid sequence of this protein.

If anyone knows of a better algorithm to determine whether a protein can REALLY create a disulfide bond let me know.

Usage example:

Bioroebe::Protein.new('MLKLKNASCCEEE').can_form_disulfide_bonds? # => true
Bioroebe::Protein.new('MLKLKNASCEEE').can_form_disulfide_bonds?  # => false

#

Returns:

(Boolean)



111
112
113

# File 'lib/bioroebe/sequence/protein.rb', line 111

def can_form_disulfide_bonds?
  sequence?.count('C').size > 1
end

#hydrophobic_amino_acids?(hydrophobic_aminoacids = %w( A I L M V F W Y )) ⇒ `Boolean`

#

hydrophobic_amino_acids?

This method will return an Array. This Array will contain all positions, as numbers, of aminoacids that are hydrophobic. These entries will start at position 1, so the first aminoacid, if it is hydrophobic, will be part of the Array, such as [1].

#

Returns:

(Boolean)

# File 'lib/bioroebe/sequence/protein.rb', line 225

def hydrophobic_amino_acids?(
    hydrophobic_aminoacids = %w(
      A I L M V F W Y
    )
  )
  result = []
   # Used to be: G A V L I P F M W 
  chars = sequence?.chars
  chars.each_with_index {|this_aminoacid, index|
    if hydrophobic_aminoacids.include? this_aminoacid
      result << index+1
    end
  }
  return result
end

#is_glycosylated? ⇒ `Boolean` Also known as: glycosylated?

#

is_glycosylated?

#

Returns:

(Boolean)



213
214
215

# File 'lib/bioroebe/sequence/protein.rb', line 213

def is_glycosylated?
  @is_glycosylated
end

#reset ⇒ `Object`

#

reset

#

# File 'lib/bioroebe/sequence/protein.rb', line 51

def reset
  super()
  # ======================================================================= #
  # === @is_glycosylated
  #
  # Some proteins are glycosylated, that is, they carry a glycosylated
  # structure. Glycosylated proteins are sometimes called "glycoproteins".
  #
  # Two major types of protein glycosylation are known:
  #
  #   1) N-linked glycans containing asparagine-X-serine/threonine
  #      N-X-S/T.
  #
  #   2) O-linked glycans attached to the hydroxyl-oxygen of either
  #      serine, threonine, tyrosine, hydroxylysine or hydroxyproline.
  #
  # ======================================================================= #
  @is_glycosylated = false
end

#reverse_translate ⇒ `Object` Also known as: revtrans

#

reverse_translate

Usage example:

x = Bioroebe::Protein.new('MSKADYEK'); puts x.reverse_translate; '' # => AUGAGCAAGGCCGACUACGAGAAG

#

# File 'lib/bioroebe/sequence/protein.rb', line 205

def reverse_translate
  require 'bioroebe/codons/codons.rb'
  ::Bioroebe.deduce_most_likely_aminoacid_sequence_as_string(sequence?)
end

#set_delegate_string(i) ⇒ `Object`

#

set_delegate_string

This method ultimately sets the main sequence to be had for this class, but it will do so after checking for aminoacids.

#

# File 'lib/bioroebe/sequence/protein.rb', line 121

def set_delegate_string(i)
  i = i.first if i.is_a? Array
  # ======================================================================= #
  # === Ensure only valid aminoacids past this point
  # ======================================================================= #
  unless i.scan(/(B|J)/).empty?
    _ = $1.to_s.dup
    e "Invalid aminoacid passed in: `#{_}`"
    raise InvalidAminoacid.new(_),
      'only valid aminoacids may be passed to this '\
      'method. Invalid aminoacids include "B" and "J".'
  end if i
  set_string(i)
end

#to_dna ⇒ `Object`

#

to_dna

#



193
194
195

# File 'lib/bioroebe/sequence/protein.rb', line 193

def to_dna
  to_rna.tr('U','T')
end

#to_rna ⇒ `Object` Also known as: aminoacid_to_codon

#

to_rna

Convert the main sequence to RNA and return that result.

Note that currently the behaviour will determine the “most likely codon sequence” for a given aminoacid sequence. Evidently this will NOT always be absolutely accurate for reallife organisms, so keep this in mind when using this method here.

#

# File 'lib/bioroebe/sequence/protein.rb', line 177

def to_rna
  require 'bioroebe/codons/codons.rb'
  require 'bioroebe/nucleotides/most_likely_nucleotide_sequence_for_this_aminoacid_sequence.rb'
  _ = @sequence
  result = Bioroebe.return_the_most_likely_codon_sequence_for_this_aminoacid_sequence(
    _
  )
  if result.is_a? Array
    result = result.join
  end
  return result.tr('T','U')
end

#weight? ⇒ `Boolean` Also known as: molecular_weight?

#

weight?

This method will return the total weight of this aminoacid sequence.

The standard weight assigned to each member of the 20-symbol amino acid alphabet is the “monoisotopic mass” of the corresponding amino acid.

#

Returns:

(Boolean)

# File 'lib/bioroebe/sequence/protein.rb', line 151

def weight?
  sum = 0
  chars = string?.chars
  chars.each {|this_aminoacid|
    sum += weight_of(this_aminoacid)
  }
  return sum
end

#weight_of(i) ⇒ `Object`

#

weight_of

#



163
164
165

# File 'lib/bioroebe/sequence/protein.rb', line 163

def weight_of(i)
  ::Bioroebe.amino_acid_monoisotopic_mass(i).to_f
end

Class: Bioroebe::Protein

Overview

Bioroebe::Protein

Constant Summary collapse

#

ARRAY_THESE_METHODS_ARE_NOT_IN_USE_FOR_THIS_CLASS

#

Constants inherited from Sequence

Class Method Summary collapse

# === Bioroebe::Protein.name.

# === Bioroebe::Protein.one_to_three.

# === Bioroebe::Protein.to_1.

Instance Method Summary collapse

# === aliphatic_index.

# === can_be_N_glycosylated?.

# === can_be_stained_via_coomassie?.

# === can_form_disulfide_bonds?.

# === hydrophobic_amino_acids?.

# === initialize ========================================================================= #.

# === is_glycosylated? ========================================================================= #.

# === reset ========================================================================= #.

# === reverse_translate.

# === set_delegate_string.

# === to_dna ========================================================================= #.

# === to_rna.

# === weight?.

# === weight_of ========================================================================= #.

Methods inherited from Sequence

Methods inherited from RawSequence

Constructor Details

#initialize(this_sequence = 'ATCG') ⇒ Protein

#

initialize

#

Class Method Details

.name(this_aminoacid = 'A') ⇒ Object

#

Bioroebe::Protein.name

#

.one_to_three(this_aminoacid = 'A') ⇒ Object

#

Bioroebe::Protein.one_to_three

#

.to_1(this_aminoacid = 'alanine') ⇒ Object

#

Bioroebe::Protein.to_1

#

Instance Method Details

#aliphatic_index(a = 2.9, b = 3.9) ⇒ Object

#

aliphatic_index

#

#can_be_N_glycosylated? ⇒ Boolean

#

can_be_N_glycosylated?

#

#can_be_stained_via_coomassie? ⇒ Boolean

#

can_be_stained_via_coomassie?

#

#can_form_disulfide_bonds? ⇒ Boolean

#

can_form_disulfide_bonds?

#

#hydrophobic_amino_acids?(hydrophobic_aminoacids = %w( A I L M V F W Y )) ⇒ Boolean

#

hydrophobic_amino_acids?

#

#is_glycosylated? ⇒ Boolean Also known as: glycosylated?

#

is_glycosylated?

#

#reset ⇒ Object

#

reset

#

#reverse_translate ⇒ Object Also known as: revtrans

#

reverse_translate

#

#initialize(this_sequence = 'ATCG') ⇒ `Protein`

.name(this_aminoacid = 'A') ⇒ `Object`

.one_to_three(this_aminoacid = 'A') ⇒ `Object`

.to_1(this_aminoacid = 'alanine') ⇒ `Object`

#aliphatic_index(a = 2.9, b = 3.9) ⇒ `Object`

#can_be_N_glycosylated? ⇒ `Boolean`

#can_be_stained_via_coomassie? ⇒ `Boolean`

#can_form_disulfide_bonds? ⇒ `Boolean`

#hydrophobic_amino_acids?(hydrophobic_aminoacids = %w( A I L M V F W Y )) ⇒ `Boolean`

#is_glycosylated? ⇒ `Boolean` Also known as: glycosylated?

#reset ⇒ `Object`

#reverse_translate ⇒ `Object` Also known as: revtrans

#set_delegate_string(i) ⇒ `Object`

#to_dna ⇒ `Object`

#to_rna ⇒ `Object` Also known as: aminoacid_to_codon

#weight? ⇒ `Boolean` Also known as: molecular_weight?

#weight_of(i) ⇒ `Object`