Class: Bio::PolyploidTools::NoSNPSequence

Inherits:
SNP
  • Object
show all
Defined in:
lib/bio/PolyploidTools/NoSNPSequence.rb

Instance Attribute Summary collapse

Attributes inherited from SNP

#chromosome, #container, #errors, #exon_list, #flanking_size, #gene, #genomes_count, #hit_count, #ideal_max, #ideal_min, #max_hits, #original, #original_name, #position, #primer_3_min_seq_length, #repetitive, #snp, #snp_in, #snp_type, #template_sequence, #use_reference, #variation_free_region

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from SNP

#add_exon, #aligned_sequences_fasta, #aligned_snp_position, #chromosome_genome, #chromosome_group, #covered_region, #cut_and_pad_sequence_to_primer_region, #cut_sequence_to_primer_region, #exon_for_chromosome, #exon_sequences, #get_snp_position_after_trim, #get_target_sequence, #initialize, #left_padding, #local_position, #padded_position, #parental_sequences, #primer_3_string, #primer_fasta_string, #return_primer_3_string, #reverse_complement_string, #right_padding, #setTemplateFromFastaFile, #short_s, #snp_id_in_seq, #surrounding_exon_sequences, #surrounding_masked_chromosomal_snps, #surrounding_parental_sequences, #to_fasta, #to_polymarker_coordinates, #to_polymarker_sequence

Constructor Details

This class inherits a constructor from Bio::PolyploidTools::SNP

Instance Attribute Details

#sequence_originalObject

Returns the value of attribute sequence_original.



10
11
12
# File 'lib/bio/PolyploidTools/NoSNPSequence.rb', line 10

def sequence_original
  @sequence_original
end

Class Method Details

.parse(reg_str) ⇒ Object

Format: snp name,chromsome from contig,microarray sequence BS00068396_51,2AS,CGAAGCGATCCTACTACATTGCGTTCCTTTCCCACTCCCAGGTCCCCCTAATGCAGGATCTTGATTAGTCGTGTGAACAACTGAAATTTGAGCGCCACAA



14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# File 'lib/bio/PolyploidTools/NoSNPSequence.rb', line 14

def self.parse(reg_str)
  reg_str.chomp!
  snp = NoSNPSequence.new

  arr = reg_str.split(",")
  
  if arr.size == 3
    snp.gene, snp.chromosome, snp.sequence_original = reg_str.split(",")
  elsif arr.size == 2
   snp.gene, snp.sequence_original = arr
 else
   throw SNPSequenceException.new "Need two or three fields to parse, and got #{arr.size} in #{reg_str}"
  end
  #snp.position = snp.position.to_i
  #snp.original.upcase!
  #snp.snp.upcase!  
  snp.chromosome. strip!
  snp.snp_in = snp.chromosome
  snp.parse_sequence_snp
  snp.exon_list = Hash.new()
  snp
end

Instance Method Details

#aligned_sequencesObject



265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
# File 'lib/bio/PolyploidTools/NoSNPSequence.rb', line 265

def aligned_sequences
 
  return @aligned_sequences if @aligned_sequences
  if sequences_to_align.size == 1
    @aligned_sequences = sequences_to_align
    return @aligned_sequences
  end
  options = ['--maxiterate', '1000', '--localpair', '--quiet']
  mafft = Bio::MAFFT.new( "mafft" , options)
#  puts "Before MAFT:#{sequences_to_align.inspect}"
  report = mafft.query_align(sequences_to_align)
  @aligned_sequences = report.alignment
   #   puts "MAFFT: #{report.alignment.inspect}" 
  @aligned_sequences
end

#count_deletions_around(position, target_chromosome) ⇒ Object



95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
# File 'lib/bio/PolyploidTools/NoSNPSequence.rb', line 95

def count_deletions_around(position,target_chromosome)
  first_aligned = aligned_sequences[target_chromosome]

  pos_start = position - flanking_size
  pos_end = position + flanking_size
  pos_start = 0 if pos_start < 0
  pos_end = first_aligned.size - 1 if pos_end >= first_aligned.size
  count = 0
  for i in pos_start..pos_end
    has_del = false

    aligned_sequences.each_pair do |name, val|  
      has_del = true if val[i] == '-'
      print "#{val[i]}\t"
    end
    count += 1 if has_del
    print "#{count}\n"
  end
  return count
end

#get_base_in_different_chromosome(position, target_chromosome) ⇒ Object



217
218
219
220
221
222
223
# File 'lib/bio/PolyploidTools/NoSNPSequence.rb', line 217

def get_base_in_different_chromosome(position, target_chromosome)

    aligned_sequences.each_pair do |name, val|  
      next if target_chromosome == name
      return val[position]
    end
end

#mask_aligned_chromosomal_snp(chromosome) ⇒ Object



56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# File 'lib/bio/PolyploidTools/NoSNPSequence.rb', line 56

def mask_aligned_chromosomal_snp(chromosome)
  return nil if  aligned_sequences.values.size == 0
  names = exon_sequences.keys

  masked_snps = aligned_sequences[chromosome].downcase if aligned_sequences[chromosome]
   
  masked_snps = "-" * aligned_sequences.values[0].size  unless aligned_sequences[chromosome]
  #TODO: Make this chromosome specific, even when we have more than one alignment going to the region we want.
  i = 0
  while i < masked_snps.size
    different = 0
    cov = 0
    from_group = 0
    names.each do | chr |
      if aligned_sequences[chr] and aligned_sequences[chr][i]  != "-"
        cov += 1 

        from_group += 1 if chr[0] == chromosome_group
        #puts "Comparing #{chromosome_group} and #{chr[0]} as chromosomes"
        if chr != chromosome 
          $stderr.puts "WARN: No base for #{masked_snps} : ##{i}" unless masked_snps[i].upcase
          $stderr.puts "WARN: No base for #{aligned_sequences[chr]} : ##{i}" unless masked_snps[i].upcase
          different += 1  if masked_snps[i].upcase != aligned_sequences[chr][i].upcase 
        end
      end
    end
    masked_snps[i] = "-" if different == 0
    masked_snps[i] = "-" if cov == 1
    masked_snps[i] = "*" if cov == 0
    expected_snps = names.size - 1
   #puts "Diferences: #{different} to expected: #{ expected_snps } [#{i}] Genome count (#{from_group} == #{genomes_count})"
    
    masked_snps[i] = masked_snps[i].upcase if different == expected_snps and from_group == genomes_count

    i += 1
  end
  masked_snps
end

#parse_sequence_snpObject



41
42
43
44
45
# File 'lib/bio/PolyploidTools/NoSNPSequence.rb', line 41

def parse_sequence_snp
   @position = (sequence_original.length / 2).to_i 
   @original = sequence_original[@position]
   @snp = @original
end

#parse_snpObject



37
38
39
# File 'lib/bio/PolyploidTools/NoSNPSequence.rb', line 37

def parse_snp
   
end

#primer_3_all_strings(target_chromosome, parental) ⇒ Object



225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
# File 'lib/bio/PolyploidTools/NoSNPSequence.rb', line 225

def primer_3_all_strings(target_chromosome, parental) 
  pr = primer_region(target_chromosome, parental )
  primer_3_propertes = Array.new

  seq_original = String.new(pr.sequence)
  #puts seq_original.size.to_s << "-" << primer_3_min_seq_length.to_s
  return primer_3_propertes if seq_original.size < primer_3_min_seq_length

  if pr.homoeologous
    snp_type = "homoeologous"
  else
    snp_type = "non-homoeologous"
  end

  pr.chromosome_specific.each do |pos|
    
    seq_snp =  String.new(pr.sequence)
    orgiginal_base = seq_snp[pos]
    other_chromosome_base = get_base_in_different_chromosome(pos, target_chromosome)

    args = {
      :name =>"#{gene} A chromosome_specific exon #{snp_type} #{chromosome}", 
      :left_pos => pos,  
      :sequence=>seq_original
    }
    
    
    primer_3_propertes << return_primer_3_string(args)
    args[:name] = "#{gene} B chromosome_specific exon #{snp_type} #{chromosome}"
    args[:sequence] = seq_snp
    #TODO: Find base from another chromosome
    seq_snp[pos] =  other_chromosome_base.upcase
    
    primer_3_propertes << return_primer_3_string(args)
  end

  
  primer_3_propertes
end

#primer_region(target_chromosome, parental_chr) ⇒ Object



116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
# File 'lib/bio/PolyploidTools/NoSNPSequence.rb', line 116

def primer_region(target_chromosome, parental_chr )
  chromosome_seq = aligned_sequences[target_chromosome]
  #chromosome_seq = "-" * parental.size unless chromosome_seq
  if aligned_sequences.size == 0
    #puts aligned_sequences.inspect
    #puts surrounding_exon_sequences.inspect
    #puts self.inspect
    chromosome_seq = surrounding_exon_sequences[target_chromosome]

  end
  chromosome_seq = chromosome_seq.downcase

  mask = mask_aligned_chromosomal_snp(target_chromosome)

  pr = PrimerRegion.new
  pr.homoeologous = false
  position_in_region = 0
  parental = chromosome_seq.clone
  (0..chromosome_seq.size-1).each do |i|

    if chromosome_seq[i] != '-'
      case   
      when mask[i] == '-'
        #When the mask doesnt detect a SNP, so we take the parental
        parental[i] = chromosome_seq[i] unless Bio::NucleicAcid::is_unambiguous(parental[i])
      when /[[:upper:]]/.match(mask[i])
        #This is a good candidate for marking a SNP
        #We validate that the consensus from the sam file accepts the variation from the chromosomal sequence
        if parental[i] == '-'
          parental[i] = mask[i]
          pr.crhomosome_specific_intron << position_in_region
        elsif Bio::NucleicAcid.is_valid(parental[i], mask[i])
          parental[i] = mask[i]
          pr.chromosome_specific << position_in_region if count_deletions_around(1,target_chromosome) < 3
          pr.chromosome_specific_in_mask << i
        end

      when /[[:lower:]]/.match(mask[i])
        #this is not that good candidate, but sitll gives specificity
        if parental[i] == '-'
          parental[i] = mask[i]
          pr.almost_crhomosome_specific_intron << position_in_region
        elsif Bio::NucleicAcid.is_valid(parental[i], mask[i])
          parental[i] = mask[i].upcase
          pr.almost_chromosome_specific << position_in_region
          pr.almost_chromosome_specific_in_mask << i
        end
      end #Case closes
      pr.position_in_mask_from_template[position_in_region] = i
      position_in_region += 1
    end #Closes region with bases 
  end         

  pr.sequence=parental.gsub('-','')
  pr
end

#return_primer_3_string_test(opts = {}) ⇒ Object



173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
# File 'lib/bio/PolyploidTools/NoSNPSequence.rb', line 173

def return_primer_3_string_test(opts={})

  left = opts[:right_pos]
  right = opts[:right_pos]
  sequence =  opts[:sequence]
  orientation = "forward"
  if opts[:right_pos]
    orientation = "forward"
    if left > right
      left = sequence.size - left - 1
      right = sequence.size - right - 1
      sequence = reverse_complement_string(sequence)
      orientation = "reverse"
    end
    if @variation_free_region > 0
      check_str = sequence[right+1, @variation_free_region]
      return nil if check_str != check_str.downcase
    end

  end


  str = "SEQUENCE_ID=#{opts[:name]} #{orientation}\n"
  str << "SEQUENCE_FORCE_LEFT_END=#{left}\n"
  str << "SEQUENCE_FORCE_RIGHT_END=#{right}\n" if opts[:right_pos]
  str << "SEQUENCE_TEMPLATE=#{sequence}\n"
  str << "=\n"


  #In case that we don't have a right primer, we do both orientations
  unless opts[:right_pos]
    sequence =  opts[:sequence]    
    left = sequence.size - left - 1
    orientation = "reverse"
    sequence = reverse_complement_string(sequence)
    str << "SEQUENCE_ID=#{opts[:name]} #{orientation}\n"
    str << "SEQUENCE_FORCE_LEFT_END=#{left}\n"
    str << "SEQUENCE_TEMPLATE=#{sequence}\n"
    str << "=\n"
  end

  str
end

#sequences_to_alignObject



51
52
53
54
# File 'lib/bio/PolyploidTools/NoSNPSequence.rb', line 51

def sequences_to_align
  @sequences_to_align = surrounding_exon_sequences unless @sequences_to_align
  @sequences_to_align
end

#to_sObject



47
48
49
# File 'lib/bio/PolyploidTools/NoSNPSequence.rb', line 47

def to_s
  "#{gene}:#{chromosome}"
end