Class: ViralSeq::Sequence

Inherits:
Object
  • Object
show all
Defined in:
lib/viral_seq/sequence.rb

Overview

ViralSeq::Sequence class for sequence operation

Examples:

create a sequence object

seq = ViralSeq::Sequence.new('my_sequence', 'ACCTAGGTTCGGAGC')
=> #<ViralSeq::Sequence:0x00007fd03c8c10b8 @name="my_sequence", @dna="ACCTAGGTTCGGAGC", @aa_string="", @aa_array=[]>

return dna sequence as String

seq.dna
=> "ACCTAGGTTCGGAGC"

reverse complement sequence of DNA sequence

seq.rc
=> "GCTCCGAACCTAGGT"

change @dna to reverse complement DNA sequence

seq.rc!

translate the DNA sequence, return values for @aa_string and @aa_array

seq = ViralSeq::Sequence.new('my_sequence', 'AWTCGRAGAG')
seq.translate(1)
seq.aa_string
=> "##E"
seq.aa_array
=> ["IF", "EG", "E"]

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(name = ">sequence", dna_sequence = "") ⇒ Sequence

initialize a ViralSeq::Sequence class with sequence name (default as ‘>sequence’) and DNA sequence as String object



32
33
34
35
36
37
# File 'lib/viral_seq/sequence.rb', line 32

def initialize (name = ">sequence",dna_sequence ="")
  @name = name
  @dna = dna_sequence.upcase
  @aa_string = ""
  @aa_array = []
end

Instance Attribute Details

#aa_arrayArray

ambiguity dna sequence will be translated in all possible amino acid sequence at the position



50
51
52
# File 'lib/viral_seq/sequence.rb', line 50

def aa_array
  @aa_array
end

#aa_stringString



46
47
48
# File 'lib/viral_seq/sequence.rb', line 46

def aa_string
  @aa_string
end

#dnaString



43
44
45
# File 'lib/viral_seq/sequence.rb', line 43

def dna
  @dna
end

#nameString



40
41
42
# File 'lib/viral_seq/sequence.rb', line 40

def name
  @name
end

Instance Method Details

#aa_lengthInteger



97
98
99
# File 'lib/viral_seq/sequence.rb', line 97

def aa_length
  @aa_string.length
end

#check_drm(drm_list_single_type) ⇒ Object

Similar to #sdrm but use a DRM list as a param



143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
# File 'lib/viral_seq/sequence.rb', line 143

def check_drm(drm_list_single_type)
  aa_array = self.aa_array
  out_hash = {}

  drm_list_single_type.each do |position, mut|
    wt_aa = mut[0]
    mut_aas = mut[1]
    test_aa = aa_array[position - 1]
    if test_aa.size == 1 and mut_aas.include?(test_aa)
      out_hash[position] = [wt_aa, test_aa]
    elsif test_aa.size > 1
      test_aa_array = test_aa.split("")
      mut_detected = test_aa_array & mut_aas

      if !mut_detected.empty?
        out_hash[position] = [wt_aa, mut_detected.join]
      end

    end
  end
  return out_hash
end

#dna_lengthInteger



92
93
94
# File 'lib/viral_seq/sequence.rb', line 92

def dna_length
  @dna.length
end

#locator(ref_option = :HXB2, algorithm = 1) ⇒ Array

HIV sequence locator function, resembling HIV Sequence Locator from LANL

# current version only supports nucleotide sequence, not for amino acid sequence.

Examples:

identify the location of the input sequence on the HXB2 genome

sequence = 'AGCAGATGATACAGTATTAGAAGAAATAAATTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAATATGATC'
s = ViralSeq::Sequence.new('my_sequence', sequence)
loc = s.locator(:HXB2)
h = ViralSeq::SeqHash.new; h.dna_hash['HXB2'] = loc[5]; h.dna_hash[s.name] = loc[4]
rs_string = h.to_rsphylip.split("\n")[1..-1].join("\n") # get a relaxed phylip format string for display of alignment.
puts "The input sequence \"#{s.name}\" is located on the HXB2 nt sequence from #{loc[0].to_s} to #{loc[1].to_s}.\nIt is #{loc[2].round(1).to_s}% similar to the reference.\nIt #{loc[3]? "does" : "does not"} have indels.\nThe alignment is\n#{rs_string}"
=> The input sequence "my_sequence" is located on the HXB2 nt sequence from 2333 to 2433.
=> It is 97.0% similar to the reference.
=> It does not have indels.
=> The alignment is
=> HXB2         AGCAGATGAT ACAGTATTAG AAGAAATGAA TTTGCCAGGA AGATGGAAAC CAAAAATGAT AGGGGGAATT GGAGGTTTTA TCAAAGTAAG ACAGTATGAT C
=> my_sequence  AGCAGATGAT ACAGTATTAG AAGAAATAAA TTTGCCAGGA AGATGGAAAC CAAAAATGAT AGGGGGAATT GGAGGTTTTA TCAAAGTAAG ACAATATGAT C

See Also:



198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
# File 'lib/viral_seq/sequence.rb', line 198

def locator(ref_option = :HXB2, algorithm = 1)
  seq = self.dna
  ref = ref_option.to_s
  begin
    loc = VirustLocator::Locator.exec(seq, "nt", algorithm, ref).split("\t")
    loc[0] = loc[0].to_i
    loc[1] = loc[1].to_i
    loc[2] = loc[2].to_f.round(1)
    if loc[3].to_s.downcase == "true"
      loc[3] = true
    else
      loc[3] = false
    end
  rescue => e
    puts "Unexpected error occured."
    puts "Exception Class: #{ e.class.name }"
    puts "Exception Message: #{ e.message }"
    puts "Exception Backtrace: #{ e.backtrace[0] }"
    puts "ViralSeq.sequence_locator returns nil"
    return nil
  end
  return loc
end

#rev_complementString Also known as: rc



53
54
55
# File 'lib/viral_seq/sequence.rb', line 53

def rev_complement
  @dna.rc
end

#rev_complement!Object Also known as: rc!

replace the @dna with reverse complement DNA sequence.



58
59
60
# File 'lib/viral_seq/sequence.rb', line 58

def rev_complement!
  @dna = @dna.rc
end

#sdrm(option, start_aa = 1) ⇒ Hash

resistant mutation interpretation for a chosen region from a translated ViralSeq::Sequence object

Examples:

examine an HIV PR region sequence for drug resistance mutations

my_seq_name = 'a_pr_seq'
my_seq = 'CCTCAGATCACTCTTTGGCAACGACCCCTCGTCACAGTAAAAATAGGAGGGCAATTAAAGGAAGCTCTATTAGATACAGGAGCAGATAATACAGTATTAGAAGACATGGAGTTACCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAATATGATCAGATACCCATAGAAATCTGTGGGCATAAAACTACAGGTACAGTGTTAATAGGACCTACACCCGTCAACATAATTGGAAGAGATCTGTTGACTCAGCTTGGTTGCACTTTAAATTTT'
s = ViralSeq::Sequence.new(my_seq_name, my_seq)
s.translate
s.sdrm(:hiv_pr)
=> {30=>["D", "N"], 88=>["N", "D"]}


113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# File 'lib/viral_seq/sequence.rb', line 113

def sdrm(option, start_aa = 1)
  aa_array = self.aa_array
  out_hash = {}
  sdrm = ViralSeq::DRMs.sdrm_hash(option)
  aa_length = aa_array.size
  end_aa = start_aa + aa_length - 1
  (start_aa..end_aa).each do |position|
    array_position = position - start_aa
    if sdrm.keys.include?(position)
      wt_aa = sdrm[position][0]
      test_aa = aa_array[array_position]
      if test_aa.size == 1
        unless wt_aa == test_aa
          if sdrm[position][1].include?(test_aa)
            out_hash[position] = [wt_aa,test_aa]
          end
        end
      else
        test_aa_array = test_aa.split("")
        if (test_aa_array & sdrm[position][1])
          out_hash[position] = [wt_aa,test_aa]
        end
      end
    end
  end
  return out_hash
end

#sequence_clip(p1 = 0, p2 = 0, ref_option = :HXB2) ⇒ ViralSeq::Sequence?

Given start and end positions on the reference genome, return a sub-sequence of the target sequence in that range

Examples:

trim a sequence to fit in the range of [2333, 2433] on the HXB2 nt reference

seq = "CCTCAGATCACTCTTTGGCAACGACCCCTAGTTACAATAAGGGTAGGGGGGCAACTAAAGGAAGCCCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATAAATTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAATATGATCAGATACCCATAGAAATTTGTGGACATGAAGCTATAGGTACAGTATTAGTGGGACCTACACCTGTCAACATAATTGGGAGAAATCTGTTGACTCAGATTGGTTGCACTCTAAATTTT"
s = ViralSeq::Sequence.new('my_seq', seq)
s.sequence_clip(2333, 2433, :HXB2).dna
=> "AGCAGATGATACAGTATTAGAAGAAATAAATTTGCCAGGAAGATGGAAACCAAAAATGATAGGGGGAATTGGAGGTTTTATCAAAGTAAGACAATATGATC"


235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
# File 'lib/viral_seq/sequence.rb', line 235

def sequence_clip(p1 = 0, p2 = 0, ref_option = :HXB2)
  loc = self.locator(ref_option)
  l1 = loc[0]
  l2 = loc[1]
  if (p1 >= l1) & (p2 <= l2)
      seq = loc[4]
      ref = loc[5]
      g1 = 0
      ref.each_char do |char|
          break if l1 == p1
          g1 += 1
          l1 += 1 unless char == "-"
      end
      g2 = 1
      ref.reverse.each_char do |char|
          break if l2 == p2
          g2 += 1
          l2 -= 1 unless char == "-"
      end
      return ViralSeq::Sequence.new(self.name,seq[g1..(-g2)].tr("-",""))
  else
      return nil
  end
end

#translate(initial_position = 0) ⇒ Object

translate @dna to amino acid sequence. generate values for @aa_string and @aa_array



69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# File 'lib/viral_seq/sequence.rb', line 69

def translate(initial_position = 0)
  @aa_string = ""
  require_sequence = @dna[initial_position..-1]
  base_array = []
  require_sequence.each_char {|base| base_array << base}
  while (base_array.length>=3) do
    base_3= ""
    3.times {base_3 += base_array.shift}
    @aa_string << amino_acid(base_3)
  end

  @aa_array = []
  require_sequence = @dna[initial_position..-1].tr('-','N')
  base_array = []
  require_sequence.each_char {|base| base_array << base}
  while (base_array.length>=3) do
    base_3= ""
    3.times{base_3 += base_array.shift}
    @aa_array<< amino_acid_2(base_3)
  end
end