Class: Bio::PolyploidTools::SNP

Inherits:
Object
  • Object
show all
Defined in:
lib/bio/PolyploidTools/SNP.rb

Direct Known Subclasses

SNPSequence

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeSNP

Returns a new instance of SNP.



38
39
40
41
42
# File 'lib/bio/PolyploidTools/SNP.rb', line 38

def initialize
  @genomes_count = 3 #TODO: if we want to use this with other polyploids, me need to set this as a variable in the main script. 
  @primer_3_min_seq_length = 50
  @variation_free_region = 0
end

Instance Attribute Details

#chromosomeObject

Returns the value of attribute chromosome.



16
17
18
# File 'lib/bio/PolyploidTools/SNP.rb', line 16

def chromosome
  @chromosome
end

#containerObject

Returns the value of attribute container.



10
11
12
# File 'lib/bio/PolyploidTools/SNP.rb', line 10

def container
  @container
end

#exon_listObject

Returns the value of attribute exon_list.



9
10
11
# File 'lib/bio/PolyploidTools/SNP.rb', line 9

def exon_list
  @exon_list
end

#flanking_sizeObject

Returns the value of attribute flanking_size.



11
12
13
# File 'lib/bio/PolyploidTools/SNP.rb', line 11

def flanking_size
  @flanking_size
end

#geneObject

GENE,ORIGINAL,POS,SNP



8
9
10
# File 'lib/bio/PolyploidTools/SNP.rb', line 8

def gene
  @gene
end

#genomes_countObject

Returns the value of attribute genomes_count.



14
15
16
# File 'lib/bio/PolyploidTools/SNP.rb', line 14

def genomes_count
  @genomes_count
end

#ideal_maxObject

Returns the value of attribute ideal_max.



11
12
13
# File 'lib/bio/PolyploidTools/SNP.rb', line 11

def ideal_max
  @ideal_max
end

#ideal_minObject

Returns the value of attribute ideal_min.



11
12
13
# File 'lib/bio/PolyploidTools/SNP.rb', line 11

def ideal_min
  @ideal_min
end

#originalObject

GENE,ORIGINAL,POS,SNP



8
9
10
# File 'lib/bio/PolyploidTools/SNP.rb', line 8

def original
  @original
end

#original_nameObject

GENE,ORIGINAL,POS,SNP



8
9
10
# File 'lib/bio/PolyploidTools/SNP.rb', line 8

def original_name
  @original_name
end

#positionObject

GENE,ORIGINAL,POS,SNP



8
9
10
# File 'lib/bio/PolyploidTools/SNP.rb', line 8

def position
  @position
end

#primer_3_min_seq_lengthObject

Returns the value of attribute primer_3_min_seq_length.



15
16
17
# File 'lib/bio/PolyploidTools/SNP.rb', line 15

def primer_3_min_seq_length
  @primer_3_min_seq_length
end

#snpObject

GENE,ORIGINAL,POS,SNP



8
9
10
# File 'lib/bio/PolyploidTools/SNP.rb', line 8

def snp
  @snp
end

#snp_inObject

GENE,ORIGINAL,POS,SNP



8
9
10
# File 'lib/bio/PolyploidTools/SNP.rb', line 8

def snp_in
  @snp_in
end

#template_sequenceObject

Returns the value of attribute template_sequence.



12
13
14
# File 'lib/bio/PolyploidTools/SNP.rb', line 12

def template_sequence
  @template_sequence
end

#use_referenceObject

Returns the value of attribute use_reference.



13
14
15
# File 'lib/bio/PolyploidTools/SNP.rb', line 13

def use_reference
  @use_reference
end

#variation_free_regionObject

Returns the value of attribute variation_free_region.



17
18
19
# File 'lib/bio/PolyploidTools/SNP.rb', line 17

def variation_free_region
  @variation_free_region
end

Class Method Details

.parse(reg_str) ⇒ Object

Format: Gene_name,Original,SNP_Pos,pos,chromosome A_comp0_c0_seq1,C,519,A



22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# File 'lib/bio/PolyploidTools/SNP.rb', line 22

def self.parse(reg_str)
  reg_str.chomp!
  snp = SNP.new
  snp.gene, snp.original, snp.position, snp.snp, snp.chromosome = reg_str.split(",")
  snp.position.strip!
  snp.position =  snp.position.to_i
  snp.original.upcase!
  snp.original.strip!
  snp.snp.upcase!
  snp.snp.strip!  
  snp.chromosome.strip!
  snp.exon_list = Hash.new()
  snp.use_reference = false
  snp
end

Instance Method Details

#add_exon(exon, arm) ⇒ Object



81
82
83
84
# File 'lib/bio/PolyploidTools/SNP.rb', line 81

def add_exon(exon, arm)
  @exon_list[arm] = exon unless @exon_list[arm]
  @exon_list[arm] = exon if exon.record.score > @exon_list[arm].record.score
end

#aligned_sequencesObject



466
467
468
469
470
471
472
473
474
475
476
# File 'lib/bio/PolyploidTools/SNP.rb', line 466

def aligned_sequences
 
  return @aligned_sequences if @aligned_sequences
  options = ['--maxiterate', '1000', '--localpair', '--quiet']
  mafft = Bio::MAFFT.new( "mafft" , options)
#  puts "Before MAFT:#{sequences_to_align.inspect}"
  report = mafft.query_align(sequences_to_align)
  @aligned_sequences = report.alignment
   #   puts "MAFFT: #{report.alignment.inspect}" 
  @aligned_sequences
end

#aligned_sequences_fastaObject



478
479
480
481
482
483
484
485
486
487
488
489
# File 'lib/bio/PolyploidTools/SNP.rb', line 478

def aligned_sequences_fasta
  ret_str = ""
  aligned_sequences.each_pair do |name, seq|
    ret_str << ">#{self.to_s}-#{name}\n#{seq}\n" 
  end
  ret_str << ">MASK #{chromosome}\n#{mask_aligned_chromosomal_snp(chromosome)}\n"

  pr = primer_region(chromosome, snp_in )
  ret_str << pr.to_fasta
  ret_str
  ret_str 
end

#aligned_snp_positionObject



491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
# File 'lib/bio/PolyploidTools/SNP.rb', line 491

def aligned_snp_position
  return @aligned_snp_position if @aligned_snp_position
  pos = -1
  parental_strings = Array.new
  parental_sequences.keys.each do | par |
    
    parental_strings << aligned_sequences[par]
  end
  template_sequence = nil
  aligned_sequences.keys.each do |temp |
    template_sequence = aligned_sequences[ temp ] if  aligned_sequences[ temp ][0] != "-"
  end
  $stderr.puts "WARN: #{self.to_s} #{parental_sequences.keys} is not of size 2 (#{parental_strings.size})" if parental_strings.size != 2

  i = 0
  differences = 0
  local_pos_in_gene = flanking_size
  local_pos = 0
  started = false
#TODO: Validate the cases when the alignment has padding on the left on all the chromosomes

  while i < parental_strings[0].size  do
    if local_pos_in_gene == local_pos
      pos = i
      if parental_strings[0][i] == parental_strings[1][i]
        $stderr.puts "WARN: #{self.to_s} doesn't have a SNP in the marked place (#{i})! \n#{parental_strings[0]}\n#{parental_strings[1]}"
      end 

    end

    started = true if template_sequence[i] != "-" 
    if started == false or template_sequence[i] != "-" 
      local_pos += 1
    end
    i += 1
  end
  @aligned_snp_position = pos
  return pos
end

#chromosome_genomeObject



68
69
70
# File 'lib/bio/PolyploidTools/SNP.rb', line 68

def chromosome_genome
  chromosome[1]
end

#chromosome_groupObject

We Only want the chromosome, we drop the arm. We don’t use this any more. def chromosome= (chr)

@chromosome = chr

end



64
65
66
# File 'lib/bio/PolyploidTools/SNP.rb', line 64

def chromosome_group
  chromosome[0]
end

#covered_regionObject



86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# File 'lib/bio/PolyploidTools/SNP.rb', line 86

def covered_region
  return @covered_region if @covered_region
  if self.use_reference
     reg = Bio::DB::Fasta::Region.new()
     reg.entry = gene
     reg.orientation = :forward
     reg.start = self.position - self.flanking_size
     reg.end = self.position + self.flanking_size
     
     reg.start = 1 if reg.start < 1
     
     return reg
  end
  
  min = @position
  max = @position
 # puts "Calculating covered region for #{self.inspect}"
#  puts "#{@exon_list.inspect}"
  #raise SNPException.new "Exons haven't been loaded for #{self.to_s}" if @exon_list.size == 0
  if @exon_list.size == 0
    min = self.position - self.flanking_size
    min = 1 if min < 1
    max =  self.position + self.flanking_size
  end
  @exon_list.each do | chromosome, exon |
   # puts exon.inspect
    reg = exon.query_region
    min = reg.start if reg.start < min
    max = reg.end if reg.end > max
  end
  reg = Bio::DB::Fasta::Region.new()
  reg.entry = gene
  reg.orientation = :forward
  reg.start = min
  reg.end = max

  @covered_region = reg
  @covered_region
end

#cut_and_pad_sequence_to_primer_region(sequence) ⇒ Object



444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
# File 'lib/bio/PolyploidTools/SNP.rb', line 444

def cut_and_pad_sequence_to_primer_region(sequence)
  ideal_min = self.local_position - flanking_size 
  ideal_max = self.local_position + flanking_size
  left_pad = 0
  right_pad=0
  if ideal_min < 0
    left_pad = ideal_min * -1
    ideal_min = 0 
  end
  if ideal_max > sequence.size
    right_pad = ideal_max - sequence.size
    ideal_max = sequence.size - 1 
  end  
  ret = "-" * left_pad  << sequence[ideal_min..ideal_max] <<  "-" * right_pad 
  ret
end

#cut_sequence_to_primer_region(sequence) ⇒ Object



435
436
437
438
439
440
441
442
# File 'lib/bio/PolyploidTools/SNP.rb', line 435

def cut_sequence_to_primer_region(sequence)
  ideal_min = self.local_position - flanking_size 
  ideal_max = self.local_position + flanking_size
  ideal_min = 0 if ideal_min < 0
  ideal_max = sequence.size - 1 if ideal_max > sequence.size
  # len = ideal_max - ideal_min
  sequence[ideal_min..ideal_max]
end

#exon_fasta_stringObject



147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
# File 'lib/bio/PolyploidTools/SNP.rb', line 147

def exon_fasta_string
  gene_region = self.covered_region
  local_pos_in_gene = self.local_position
  ret_str = ""
  container.parents.each  do |name, bam|
    ret_str << ">#{gene_region.entry}-#{self.position}_#{name} Overlapping_exons:#{gene_region.to_s} localSNPpo:#{local_pos_in_gene+1}\n" 
    to_print = parental_sequences[name]
    ret_str << to_print << "\n"
  end
  self.exon_sequences.each do | chromosome, exon_seq | 
    ret_str << ">#{chromosome}\n#{exon_seq}\n"
  end
  mask = masked_chromosomal_snps("1BS", flanking_size)
  ret_str << ">Mask\n#{mask}\n"
  ret_str
end

#exon_for_chromosome(chromosome) ⇒ Object



379
380
381
382
383
# File 'lib/bio/PolyploidTools/SNP.rb', line 379

def exon_for_chromosome (chromosome)
  selected_exon=exon_list[chromosome]
  puts "No exon with chromosome #{chromosome} for #{gene}"  unless selected_exon
  selected_exon
end

#exon_sequencesObject



680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
# File 'lib/bio/PolyploidTools/SNP.rb', line 680

def exon_sequences
  return @exon_sequences if @exon_sequences
  gene_region = self.covered_region
  local_pos_in_gene = self.local_position
  @exon_sequences = Bio::Alignment::SequenceHash.new
  self.exon_list.each do |chromosome, exon| 
    exon_start_offset = exon.query_region.start - gene_region.start
    exon_seq  = "-" * exon_start_offset 
    exon_seq << container.chromosome_sequence(exon.target_region).to_s
    #puts exon_seq
   # l_pos = exon_start_offset + local_pos_in_gene
    unless exon.snp_in_gap
      #puts "local position: #{local_pos_in_gene}"
      #puts "Exon_seq: #{exon_seq}"
      exon_seq[local_pos_in_gene] = exon_seq[local_pos_in_gene].upcase
      exon_seq << "-" * (gene_region.size - exon_seq.size + 1)
      @exon_sequences[chromosome] = exon_seq
    end
  end
  @exon_sequences[@chromosome] = "-" * gene_region.size unless @exon_sequences[@chromosome]
  @exon_sequences
end

#left_paddingObject



126
127
128
129
130
# File 'lib/bio/PolyploidTools/SNP.rb', line 126

def left_padding
  flanking_size - self.local_position + 1
  #  primer_region.start - covered_region.start 
  # 0
end

#local_positionObject



138
139
140
141
# File 'lib/bio/PolyploidTools/SNP.rb', line 138

def local_position
#      puts "local_position #{self.position} #{self.covered_region.start}"
  self.position - self.covered_region.start
end

#mask_aligned_chromosomal_snp(chromosome) ⇒ Object



531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
# File 'lib/bio/PolyploidTools/SNP.rb', line 531

def mask_aligned_chromosomal_snp(chromosome)
  names = exon_sequences.keys
  parentals =  parental_sequences.keys

  local_pos_in_gene = aligned_snp_position
  masked_snps = aligned_sequences[chromosome].downcase if aligned_sequences[chromosome]
  masked_snps = "-" * aligned_sequences.values[0].size  unless aligned_sequences[chromosome]
  #TODO: Make this chromosome specific, even when we have more than one alignment going to the region we want.
  i = 0
  while i < masked_snps.size
    different = 0
    cov = 0
    from_group = 0
    names.each do | chr |
      if aligned_sequences[chr] and aligned_sequences[chr][i]  != "-"
        cov += 1 

        from_group += 1 if chr[0] == chromosome_group
        #puts "Comparing #{chromosome_group} and #{chr[0]} as chromosomes"
        if chr != chromosome 
          $stderr.puts "WARN: No base for #{masked_snps} : ##{i}" unless masked_snps[i].upcase
          $stderr.puts "WARN: No base for #{aligned_sequences[chr]} : ##{i}" unless masked_snps[i].upcase
          different += 1  if masked_snps[i].upcase != aligned_sequences[chr][i].upcase 
        end
      end
    end
    masked_snps[i] = "-" if different == 0
    masked_snps[i] = "-" if cov == 1
    masked_snps[i] = "*" if cov == 0
    expected_snps = names.size - 1
   # puts "Diferences: #{different} to expected: #{ expected_snps } [#{i}] Genome count (#{from_group} == #{genomes_count})"
    
    masked_snps[i] = masked_snps[i].upcase if different == expected_snps and from_group == genomes_count

    if i == local_pos_in_gene
      masked_snps[i] = "&"
      bases = ""
      names.each do | chr |
        bases << aligned_sequences[chr][i]  if aligned_sequences[chr] and aligned_sequences[chr][i]  != "-"
      end
      
      code_reference = "n"
      code_reference = Bio::NucleicAcid.to_IUAPC(bases) unless bases == ""

      if Bio::NucleicAcid.is_valid(code_reference,   original) and Bio::NucleicAcid.is_valid(code_reference,   snp)
        masked_snps[i] = ":"
      end 

    end
    i += 1
  end
  masked_snps
end

#masked_chromosomal_snps(chromosome) ⇒ Object



585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
# File 'lib/bio/PolyploidTools/SNP.rb', line 585

def masked_chromosomal_snps(chromosome)
  chromosomes = exon_sequences
  names = chromosomes.keys
  masked_snps = chromosomes[chromosome].tr("-","+") if chromosomes[chromosome]
  masked_snps = "-" * covered_region.size unless chromosomes[chromosome]
  local_pos_in_gene = self.local_position
  ideal_min = local_pos_in_gene - flanking_size
  ideal_max = local_pos_in_gene + flanking_size
  i = 0
  while i < masked_snps.size  do
    if i > ideal_min and i <= ideal_max

      different = 0
      cov = 0
      names.each do | chr |
        if chromosomes[chr][i]  != "-"
          cov += 1 
          if chr != chromosome and masked_snps[i] != "+"
            different += 1  if masked_snps[i] != chromosomes[chr][i] 
          end
        end

      end
      masked_snps[i] = "-" if different == 0 and  masked_snps[i] != "+"
      masked_snps[i] = "-" if cov < 2
      masked_snps[i] = masked_snps[i].upcase if different > 1   

    else
      masked_snps[i] = "*"
    end   
    if i == local_pos_in_gene
      masked_snps[i] = "&"
    end
    i += 1
  end
  masked_snps
end

#padded_position(pos) ⇒ Object



143
144
145
# File 'lib/bio/PolyploidTools/SNP.rb', line 143

def padded_position (pos)
  pos + left_padding
end

#parental_sequencesObject



385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
# File 'lib/bio/PolyploidTools/SNP.rb', line 385

def parental_sequences
  return @parental_sequences if @parental_sequences
  gene_region = self.covered_region
  local_pos_in_gene = self.local_position

  @parental_sequences = Bio::Alignment::SequenceHash.new
  container.parents.each  do |name, bam|
    seq = nil
    if bam
      seq = bam.consensus_with_ambiguities({:region=>gene_region}).to_s
    else
      seq = container.gene_model_sequence(gene_region)
      unless name == self.snp_in
        seq[local_pos_in_gene] = self.original 
      end
    end
    seq[local_pos_in_gene] = seq[local_pos_in_gene].upcase
    
    seq[local_pos_in_gene] = self.snp if name == self.snp_in    
    @parental_sequences [name] = seq
    #puts name
    #puts seq
  end
  @parental_sequences
end

#primer_3_all_strings(target_chromosome, parental) ⇒ Object



296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
# File 'lib/bio/PolyploidTools/SNP.rb', line 296

def primer_3_all_strings(target_chromosome, parental) 
  pr = primer_region(target_chromosome, parental )
  primer_3_propertes = Array.new

  seq_original = String.new(pr.sequence)
  #puts seq_original.size.to_s << "-" << primer_3_min_seq_length.to_s
  return primer_3_propertes if seq_original.size < primer_3_min_seq_length
  
  seq_original[pr.snp_pos] = self.original
  seq_original_reverse = reverse_complement_string(seq_original)

  seq_snp =  String.new(pr.sequence)
  seq_snp[pr.snp_pos] =  self.snp
  seq_snp_reverse = reverse_complement_string(seq_snp)

  rev_pos = seq_snp.size - position

  if pr.homoeologous
    snp_type = "homoeologous"
  else
    snp_type = "non-homoeologous"
  end

  pr.chromosome_specific.each do |pos|

    args = {:name =>"#{gene}:#{original}#{position}#{snp} #{original_name} chromosome_specific exon #{snp_type} #{chromosome}", :left_pos => pr.snp_pos, :right_pos => pos, :sequence=>seq_original}
    primer_3_propertes << return_primer_3_string(args)
    args[:name] = "#{gene}:#{original}#{position}#{snp} #{snp_in} chromosome_specific exon #{snp_type} #{chromosome}"
    args[:sequence] = seq_snp
    primer_3_propertes << return_primer_3_string(args)
  end

  pr.almost_chromosome_specific.each do |pos|
    args = {:name =>"#{gene}:#{original}#{position}#{snp} #{original_name} chromosome_semispecific exon #{snp_type} #{chromosome}", :left_pos => pr.snp_pos, :right_pos => pos, :sequence=>seq_original}
    primer_3_propertes << return_primer_3_string(args)
    args[:name] = "#{gene}:#{original}#{position}#{snp} #{snp_in} chromosome_semispecific exon #{snp_type} #{chromosome}"
    args[:sequence] = seq_snp
    primer_3_propertes << return_primer_3_string(args)

  end

  pr.crhomosome_specific_intron.each do |pos|

    args = {:name =>"#{gene}:#{original}#{position}#{snp} #{original_name} chromosome_specific intron #{snp_type} #{chromosome}", :left_pos => pr.snp_pos, :right_pos => pos, :sequence=>seq_original}
    primer_3_propertes << return_primer_3_string(args)
    args[:name] = "#{gene}:#{original}#{position}#{snp} #{snp_in} chromosome_specific exon #{snp_type} #{chromosome}"
    args[:sequence] = seq_snp
    primer_3_propertes << return_primer_3_string(args)
  end

  pr.almost_crhomosome_specific_intron.each do |pos|
    args = {:name =>"#{gene}:#{original}#{position}#{snp} #{original_name} chromosome_semispecific intron #{snp_type} #{chromosome}", :left_pos => pr.snp_pos, :right_pos => pos, :sequence=>seq_original}
    primer_3_propertes << return_primer_3_string(args)
    args[:name] = "#{gene}:#{original}#{position}#{snp} #{snp_in} chromosome_semispecific exon #{snp_type} #{chromosome}"
    args[:sequence] = seq_snp
    primer_3_propertes << return_primer_3_string(args)

  end


  args = {:name =>"#{gene}:#{original}#{position}#{snp} #{original_name} chromosome_nonspecific all #{snp_type} #{chromosome}", :left_pos => pr.snp_pos, :sequence=>seq_original}
  primer_3_propertes << return_primer_3_string(args)
  args[:name] = "#{gene}:#{original}#{position}#{snp} #{snp_in} chromosome_nonspecific all #{snp_type} #{chromosome}"
  args[:sequence] = seq_snp
  primer_3_propertes << return_primer_3_string(args)


  primer_3_propertes
end

#primer_3_string(target_chromosome, parental) ⇒ Object



374
375
376
377
# File 'lib/bio/PolyploidTools/SNP.rb', line 374

def primer_3_string(target_chromosome, parental) 
  strings = primer_3_all_strings(target_chromosome, parental) 
  strings.join
end

#primer_fasta_stringObject



165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
# File 'lib/bio/PolyploidTools/SNP.rb', line 165

def primer_fasta_string
  gene_region = self.covered_region
  local_pos_in_gene = self.local_position
  ret_str = ""

  surrounding_parental_sequences.each do |name, seq|
    ret_str << ">#{gene_region.entry}-#{self.position}_#{name}\n" 
    ret_str << "#{seq}\n"
  end

  #self.exon_sequences.each do | chromosome, exon_seq | 
  #  ex_seq = cut_sequence_to_primer_region(exon_seq)
  #  ret_str << ">#{chromosome}\n#{ex_seq}\n"
  #end
  self.surrounding_exon_sequences.each do |chromosome, exon_seq|
    ret_str << ">#{chromosome}\n#{exon_seq}\n"
  end

  mask = surrounding_masked_chromosomal_snps(chromosome)
  ret_str << ">Mask\n#{mask}\n"

  pr = primer_region(chromosome, snp_in )
  ret_str << pr.to_fasta
  ret_str
end

#primer_region(target_chromosome, parental) ⇒ Object



191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
# File 'lib/bio/PolyploidTools/SNP.rb', line 191

def primer_region(target_chromosome, parental )
  parental = aligned_sequences[parental].downcase
  chromosome_seq = aligned_sequences[target_chromosome]
  chromosome_seq = "-" * parental.size unless chromosome_seq
  chromosome_seq = chromosome_seq.downcase
  mask = mask_aligned_chromosomal_snp(target_chromosome)
  #puts "'#{mask}'"

  pr = PrimerRegion.new
  position_in_region = 0
  (0..parental.size-1).each do |i|

    if chromosome_seq[i] != '-' or parental[i] != '-'
      case   
      when mask[i] == '&'
        #This is the SNP we take the parental
        pr.snp_pos = position_in_region
        pr.homoeologous = false
      when mask[i] == ':'
        #This is the SNP we take the parental
        pr.snp_pos = position_in_region
        pr.homoeologous = true
      when mask[i] == '-'
        #When the mask doesnt detect a SNP, so we take the parental
        parental[i] = chromosome_seq[i] unless Bio::NucleicAcid::is_unambiguous(parental[i])

      when /[[:upper:]]/.match(mask[i])
        #This is a good candidate for marking a SNP
        #We validate that the consensus from the sam file accepts the variation from the chromosomal sequence
        if parental[i] == '-'
          parental[i] = mask[i]
          pr.crhomosome_specific_intron << position_in_region
        elsif Bio::NucleicAcid.is_valid(parental[i], mask[i])
          parental[i] = mask[i]
          pr.chromosome_specific << position_in_region
        end
      when /[[:lower:]]/.match(mask[i])
        #this is not that good candidate, but sitll gives specificity

        if parental[i] == '-'
          parental[i] = mask[i]
          pr.almost_crhomosome_specific_intron << position_in_region
        elsif Bio::NucleicAcid.is_valid(parental[i], mask[i])
          parental[i] = mask[i].upcase
          pr.almost_chromosome_specific << position_in_region
        end
      end #Case closes
      position_in_region += 1
    end #Closes region with bases 
  end         

  pr.sequence=parental.gsub('-','')
  pr
end

#return_primer_3_string(opts = {}) ⇒ Object



251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
# File 'lib/bio/PolyploidTools/SNP.rb', line 251

def return_primer_3_string(opts={})

  left = opts[:left_pos]
  right = opts[:right_pos]
  sequence =  opts[:sequence]
  orientation = "forward"
  if opts[:right_pos]
    orientation = "forward"
    if left > right
      left = sequence.size - left - 1
      right = sequence.size - right - 1
      sequence = reverse_complement_string(sequence)
      orientation = "reverse"
    end
    if @variation_free_region > 0
      check_str = sequence[right+1, @variation_free_region]
      return nil if check_str != check_str.downcase
    end

  end


  str = "SEQUENCE_ID=#{opts[:name]} #{orientation}\n"
  str << "SEQUENCE_FORCE_LEFT_END=#{left}\n"
  str << "SEQUENCE_FORCE_RIGHT_END=#{right}\n" if opts[:right_pos]
  str << "SEQUENCE_TEMPLATE=#{sequence}\n"
  str << "=\n"


  #In case that we don't have a right primer, we do both orientations
  unless opts[:right_pos]
    sequence =  opts[:sequence]    
    left = sequence.size - left - 1
    orientation = "reverse"
    sequence = reverse_complement_string(sequence)
    str << "SEQUENCE_ID=#{opts[:name]} #{orientation}\n"
    str << "SEQUENCE_FORCE_LEFT_END=#{left}\n"
    str << "SEQUENCE_TEMPLATE=#{sequence}\n"
    str << "=\n"
  end

  str
end

#reverse_complement_string(sequenc_str) ⇒ Object



246
247
248
249
# File 'lib/bio/PolyploidTools/SNP.rb', line 246

def reverse_complement_string(sequenc_str)
  complement = sequenc_str.tr('atgcrymkdhvbswnATGCRYMKDHVBSWN', 'tacgyrkmhdbvswnTACGYRKMHDBVSWN')
  complement.reverse!
end

#right_paddingObject



132
133
134
135
136
# File 'lib/bio/PolyploidTools/SNP.rb', line 132

def right_padding
  ret =  (2*flanking_size) - (left_padding  + self.covered_region.size ) 
  ret = 0 if ret < 0
  ret  
end

#sequences_to_alignObject



461
462
463
464
# File 'lib/bio/PolyploidTools/SNP.rb', line 461

def sequences_to_align
  @sequences_to_align = surrounding_parental_sequences.merge(surrounding_exon_sequences) unless @sequences_to_align
  @sequences_to_align
end

#short_sObject



370
371
372
# File 'lib/bio/PolyploidTools/SNP.rb', line 370

def short_s
  "#{original}#{position}#{snp}".upcase
end

#snp_id_in_seqObject



54
55
56
# File 'lib/bio/PolyploidTools/SNP.rb', line 54

def snp_id_in_seq
  "#{original}#{position}#{snp}"
end

#surrounding_exon_sequencesObject



660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
# File 'lib/bio/PolyploidTools/SNP.rb', line 660

def surrounding_exon_sequences
  return @surrounding_exon_sequences if @surrounding_exon_sequences
  @surrounding_exon_sequences =  Bio::Alignment::SequenceHash.new
  self.exon_list.each do |chromosome, exon| 
    #puts "surrounding_exon_sequences #{flanking_size}"
    #puts chromosome
    #puts exon
    flanquing_region  = exon.target_flanking_region_from_position(position,flanking_size)
    #TODO: Padd when the exon goes over the regions... 
    
    #Ignoring when the exon is in a gap
    unless exon.snp_in_gap 
      exon_seq = container.chromosome_sequence(flanquing_region)
      @surrounding_exon_sequences[chromosome] = exon_seq
    end
  end
  @surrounding_exon_sequences
end

#surrounding_masked_chromosomal_snps(chromosome) ⇒ Object



623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
# File 'lib/bio/PolyploidTools/SNP.rb', line 623

def surrounding_masked_chromosomal_snps(chromosome)

  chromosomes = surrounding_exon_sequences
  names = chromosomes.keys
  masked_snps = chromosomes[chromosome].tr("-","+") if chromosomes[chromosome]
  masked_snps = "-" * (flanking_size * 2 ) unless chromosomes[chromosome]
  local_pos_in_gene = flanking_size 
  # ideal_min = local_pos_in_gene - flanking_size
  #ideal_max = local_pos_in_gene + flanking_size
  i = 0
  while i < masked_snps.size  do


    different = 0
    cov = 0
    names.each do | chr |
      if chromosomes[chr][i]  != "-" and chromosomes[chr][i]. != 'N' and chromosomes[chr][i]. != 'n'
        cov += 1 
        if chr != chromosome and masked_snps[i] != "+"
          different += 1  if masked_snps[i] != chromosomes[chr][i] 
        end
      end

    end
    masked_snps[i] = "-" if different == 0 and  masked_snps[i] != "+"
    masked_snps[i] = "-" if cov < 2
    masked_snps[i] = masked_snps[i].upcase if different > 1   


    if i == local_pos_in_gene
      masked_snps[i] = "&"
    end
    i += 1
  end
  masked_snps
end

#surrounding_parental_sequencesObject



411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
# File 'lib/bio/PolyploidTools/SNP.rb', line 411

def surrounding_parental_sequences
  return @surrounding_parental_sequences if @surrounding_parental_sequences
  gene_region = self.covered_region
  local_pos_in_gene = self.local_position

  @surrounding_parental_sequences = Bio::Alignment::SequenceHash.new
  container.parents.each  do |name, bam|
    seq = nil
    if bam
      seq =  bam.consensus_with_ambiguities({:region=>gene_region}).to_s
    else
      seq = container.gene_model_sequence(gene_region)
       unless name == self.snp_in
         # puts "Modiging original: #{name} #{self.original}"  
          seq[local_pos_in_gene] = self.original 
        end
    end
    seq[local_pos_in_gene] = seq[local_pos_in_gene].upcase
    seq[local_pos_in_gene] = self.snp if name == self.snp_in  
    @surrounding_parental_sequences [name] = cut_and_pad_sequence_to_primer_region(seq)
  end
  @surrounding_parental_sequences
end

#to_fastaObject



77
78
79
# File 'lib/bio/PolyploidTools/SNP.rb', line 77

def to_fasta
  return ">#{self.gene}\n#{self.template_sequence}\n"
end

#to_polymarker_sequence(flanking_size) ⇒ Object



44
45
46
47
48
49
50
51
52
# File 'lib/bio/PolyploidTools/SNP.rb', line 44

def to_polymarker_sequence(flanking_size)
  out = template_sequence.clone
  out[position-1]  = "[#{original}/#{snp}]"

  start = position - flanking_size - 1
  start = 0 if start < 0
  total = flanking_size * 2 + 6
  out[start , total ]
end

#to_sObject



366
367
368
# File 'lib/bio/PolyploidTools/SNP.rb', line 366

def to_s
  "#{gene}:#{original}#{position}#{snp}#{chromosome}"
end