Module: BioDSL::Translate

Included in:
Seq
Defined in:
lib/BioDSL/seq/translate.rb

Overview

Namespace for Translate methods.

Constant Summary collapse

TRANS_TAB11_START =

Translation table 11 (www.ncbi.nlm.nih.gov/Taxonomy/taxonomyhome.html/index.cgi?chapter=cgencodes#SG11)

AAs  = FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG

Starts = —M—————M————MMMM—————M———— Base1 = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG Base2 = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG Base3 = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG

{
  'TTG' => 'M', 'CTG' => 'M', 'ATT' => 'M', 'ATC' => 'M',
  'ATA' => 'M', 'ATG' => 'M', 'GTG' => 'M'
}
TRANS_TAB11 =
{
  'TTT' => 'F', 'TCT' => 'S', 'TAT' => 'Y', 'TGT' => 'C',
  'TTC' => 'F', 'TCC' => 'S', 'TAC' => 'Y', 'TGC' => 'C',
  'TTA' => 'L', 'TCA' => 'S', 'TAA' => '*', 'TGA' => '*',
  'TTG' => 'L', 'TCG' => 'S', 'TAG' => '*', 'TGG' => 'W',
  'CTT' => 'L', 'CCT' => 'P', 'CAT' => 'H', 'CGT' => 'R',
  'CTC' => 'L', 'CCC' => 'P', 'CAC' => 'H', 'CGC' => 'R',
  'CTA' => 'L', 'CCA' => 'P', 'CAA' => 'Q', 'CGA' => 'R',
  'CTG' => 'L', 'CCG' => 'P', 'CAG' => 'Q', 'CGG' => 'R',
  'ATT' => 'I', 'ACT' => 'T', 'AAT' => 'N', 'AGT' => 'S',
  'ATC' => 'I', 'ACC' => 'T', 'AAC' => 'N', 'AGC' => 'S',
  'ATA' => 'I', 'ACA' => 'T', 'AAA' => 'K', 'AGA' => 'R',
  'ATG' => 'M', 'ACG' => 'T', 'AAG' => 'K', 'AGG' => 'R',
  'GTT' => 'V', 'GCT' => 'A', 'GAT' => 'D', 'GGT' => 'G',
  'GTC' => 'V', 'GCC' => 'A', 'GAC' => 'D', 'GGC' => 'G',
  'GTA' => 'V', 'GCA' => 'A', 'GAA' => 'E', 'GGA' => 'G',
  'GTG' => 'V', 'GCG' => 'A', 'GAG' => 'E', 'GGG' => 'G'
}

Instance Method Summary collapse

Instance Method Details

#translate(trans_tab = 11) ⇒ Object Also known as: to_protein



77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
# File 'lib/BioDSL/seq/translate.rb', line 77

def translate(trans_tab = 11)
  unless @type == :dna
    fail SeqError, "Sequence type must be 'dna' - not #{@type}"
  end

  unless (length % 3) == 0
    fail SeqError, 'Sequence length must be a multiplum of 3 - ' \
      " was: #{length}"
  end

  case trans_tab
  when 11
    codon_start_hash = TRANS_TAB11_START
    codon_hash       = TRANS_TAB11
  else
    fail SeqError, "Unknown translation table: #{trans_tab}"
  end

  codon = @seq[0...3].upcase

  aa = codon_start_hash[codon]

  fail SeqError, "Unknown start codon: #{codon}" if aa.nil?

  protein = aa.dup

  (3...length).step(3) do |i|
    codon = @seq[i...i + 3].upcase

    aa = codon_hash[codon]

    fail SeqError, "Unknown codon: #{codon}" if aa.nil?

    protein << aa.dup
  end

  Seq.new(seq_name: @seq_name, seq: protein[0..-2], type: :protein)
end

#translate!(trans_tab = 11) ⇒ Object Also known as: to_protein!

Method to translate a DNA sequence to protein.



64
65
66
67
68
69
70
71
72
73
# File 'lib/BioDSL/seq/translate.rb', line 64

def translate!(trans_tab = 11)
  entry = translate(trans_tab)

  self.seq_name = entry.seq_name ? entry.seq_name.dup : nil
  self.seq      = entry.seq.dup
  self.type     = entry.type
  self.qual     = entry.qual

  self
end