Class: Bio::PolyploidTools::SNP

Inherits:
Object
  • Object
show all
Defined in:
lib/bio/PolyploidTools/SNP.rb

Direct Known Subclasses

NoSNPSequence, SNPSequence

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeSNP

Returns a new instance of SNP.



94
95
96
97
98
99
100
101
102
103
104
# File 'lib/bio/PolyploidTools/SNP.rb', line 94

def initialize
  @genomes_count = 3 
  @primer_3_min_seq_length = 50
  @variation_free_region = 0
  @contig = false
  @max_hits = 8
  @exon_list = Hash.new {|hsh, key| hsh[key] = [] }
  @errors = Array.new
  @repetitive = false
  @hit_count = 0
end

Instance Attribute Details

#chromosomeObject

Returns the value of attribute chromosome.



17
18
19
# File 'lib/bio/PolyploidTools/SNP.rb', line 17

def chromosome
  @chromosome
end

#containerObject

Returns the value of attribute container.



11
12
13
# File 'lib/bio/PolyploidTools/SNP.rb', line 11

def container
  @container
end

#contigObject

Returns the value of attribute contig.



9
10
11
# File 'lib/bio/PolyploidTools/SNP.rb', line 9

def contig
  @contig
end

#errorsObject

Returns the value of attribute errors.



20
21
22
# File 'lib/bio/PolyploidTools/SNP.rb', line 20

def errors
  @errors
end

#exon_listObject

Returns the value of attribute exon_list.



10
11
12
# File 'lib/bio/PolyploidTools/SNP.rb', line 10

def exon_list
  @exon_list
end

#flanking_sizeObject

Returns the value of attribute flanking_size.



12
13
14
# File 'lib/bio/PolyploidTools/SNP.rb', line 12

def flanking_size
  @flanking_size
end

#geneObject

GENE,ORIGINAL,POS,SNP



8
9
10
# File 'lib/bio/PolyploidTools/SNP.rb', line 8

def gene
  @gene
end

#genomes_countObject

Returns the value of attribute genomes_count.



15
16
17
# File 'lib/bio/PolyploidTools/SNP.rb', line 15

def genomes_count
  @genomes_count
end

#hit_countObject

Returns the value of attribute hit_count.



22
23
24
# File 'lib/bio/PolyploidTools/SNP.rb', line 22

def hit_count
  @hit_count
end

#ideal_maxObject

Returns the value of attribute ideal_max.



12
13
14
# File 'lib/bio/PolyploidTools/SNP.rb', line 12

def ideal_max
  @ideal_max
end

#ideal_minObject

Returns the value of attribute ideal_min.



12
13
14
# File 'lib/bio/PolyploidTools/SNP.rb', line 12

def ideal_min
  @ideal_min
end

#max_hitsObject

Returns the value of attribute max_hits.



19
20
21
# File 'lib/bio/PolyploidTools/SNP.rb', line 19

def max_hits
  @max_hits
end

#orientationObject

Returns the value of attribute orientation.



24
25
26
# File 'lib/bio/PolyploidTools/SNP.rb', line 24

def orientation
  @orientation
end

#originalObject

GENE,ORIGINAL,POS,SNP



8
9
10
# File 'lib/bio/PolyploidTools/SNP.rb', line 8

def original
  @original
end

#original_nameObject

GENE,ORIGINAL,POS,SNP



8
9
10
# File 'lib/bio/PolyploidTools/SNP.rb', line 8

def original_name
  @original_name
end

#positionObject

GENE,ORIGINAL,POS,SNP



8
9
10
# File 'lib/bio/PolyploidTools/SNP.rb', line 8

def position
  @position
end

#primer_3_min_seq_lengthObject

Returns the value of attribute primer_3_min_seq_length.



16
17
18
# File 'lib/bio/PolyploidTools/SNP.rb', line 16

def primer_3_min_seq_length
  @primer_3_min_seq_length
end

#repetitiveObject

Returns the value of attribute repetitive.



21
22
23
# File 'lib/bio/PolyploidTools/SNP.rb', line 21

def repetitive
  @repetitive
end

#snpObject

GENE,ORIGINAL,POS,SNP



8
9
10
# File 'lib/bio/PolyploidTools/SNP.rb', line 8

def snp
  @snp
end

#snp_inObject

GENE,ORIGINAL,POS,SNP



8
9
10
# File 'lib/bio/PolyploidTools/SNP.rb', line 8

def snp_in
  @snp_in
end

#snp_typeObject

Returns the value of attribute snp_type.



23
24
25
# File 'lib/bio/PolyploidTools/SNP.rb', line 23

def snp_type
  @snp_type
end

#template_sequenceObject

Returns the value of attribute template_sequence.



13
14
15
# File 'lib/bio/PolyploidTools/SNP.rb', line 13

def template_sequence
  @template_sequence
end

#use_referenceObject

Returns the value of attribute use_reference.



14
15
16
# File 'lib/bio/PolyploidTools/SNP.rb', line 14

def use_reference
  @use_reference
end

#variation_free_regionObject

Returns the value of attribute variation_free_region.



18
19
20
# File 'lib/bio/PolyploidTools/SNP.rb', line 18

def variation_free_region
  @variation_free_region
end

Class Method Details

.parse(reg_str) ⇒ Object

Format: Gene_name,Original,SNP_Pos,pos,chromosome A_comp0_c0_seq1,C,519,A,2A



29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# File 'lib/bio/PolyploidTools/SNP.rb', line 29

def self.parse(reg_str)
  reg_str.chomp!
  snp = SNP.new
  snp.gene, snp.original, snp.position, snp.snp, snp.chromosome = reg_str.split(",")
  snp.position.strip!
  snp.position =  snp.position.to_i
  snp.original.upcase!
  snp.original.strip!
  snp.snp.upcase!
  snp.snp.strip!  
  snp.chromosome.strip!
  
  snp.use_reference = false
  snp
end

.parseVCF(vcf_line, chr_arm_parser: Bio::PolyploidTools::ChromosomeArm.getArmSelection("first_two")) ⇒ Object

Format: IWGSC_CSS_1AL_scaff_1455974 127 test_snp C T 135.03 .



47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# File 'lib/bio/PolyploidTools/SNP.rb', line 47

def self.parseVCF(vcf_line, chr_arm_parser: Bio::PolyploidTools::ChromosomeArm.getArmSelection("first_two") )
  snp = SNP.new
  arr = vcf_line.split("\t")
  snp.gene     = arr[2]
  snp.original = arr[3]
  snp.position = arr[1]
  snp.snp = arr[4] 
  snp.chromosome = chr_arm_parser.call(arr[0]) 
  snp.contig = arr[0]
  snp.position.strip!
  snp.position =  snp.position.to_i
  snp.original.upcase!
  snp.original.strip!
  snp.snp.upcase!
  snp.snp.strip!  
  snp.chromosome.strip! 
  snp.orientation = :forward

  info = arr[7]
  if info
    details =  info.scan(/(\w+)=([\w|.]+)/).collect { |id, value| { :id => id, :value => value }}
    details.each do |e|   
      snp.orientation = :reverse if e[:id] == "OR" and e[:value] == "reverse"
    end
  end
  return snp
end

Instance Method Details

#add_exon(exon, arm, filter_best: true) ⇒ Object



164
165
166
167
168
169
170
171
172
# File 'lib/bio/PolyploidTools/SNP.rb', line 164

def add_exon(exon, arm, filter_best: true)
  exon_list[arm] = Array.new unless exon_list[arm]
  if filter_best and exon_list[arm].size > 0
    current = exon_list[arm].first
    exon_list[arm] = [exon] if exon.record.score > current.record.score 
  else
     exon_list[arm] << exon 
  end
end

#aligned_sequencesObject



559
560
561
562
563
564
565
566
567
568
569
570
571
572
# File 'lib/bio/PolyploidTools/SNP.rb', line 559

def aligned_sequences
 
  return @aligned_sequences if @aligned_sequences
  return Hash.new if sequences_to_align.size == 0
  
  options = ['--maxiterate', '1000', '--localpair', '--quiet']
  mafft = Bio::MAFFT.new( "mafft" , options)
  #puts "Before MAFT:#{sequences_to_align.inspect}"

  report = mafft.query_align(sequences_to_align)
  @aligned_sequences = report.alignment
   #   puts "MAFFT: #{report.alignment.inspect}" 
  @aligned_sequences
end

#aligned_sequences_fastaObject



574
575
576
577
578
579
580
581
582
583
584
585
# File 'lib/bio/PolyploidTools/SNP.rb', line 574

def aligned_sequences_fasta
  ret_str = ""
  aligned_sequences.each_pair do |name, seq|
    ret_str << ">#{self.to_s}-#{name}\n#{seq}\n" 
  end
  ret_str << ">MASK #{chromosome}\n#{mask_aligned_chromosomal_snp(chromosome)}\n"

  pr = primer_region(chromosome, snp_in )
  ret_str << pr.to_fasta
  ret_str
  ret_str 
end

#aligned_snp_positionObject



600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
# File 'lib/bio/PolyploidTools/SNP.rb', line 600

def aligned_snp_position
  return @aligned_snp_position if @aligned_snp_position
  #puts self.inspect
  pos = -1
  parental_strings = Array.new
  parental_sequences.keys.each do | par |
    parental_strings << aligned_sequences[par]
  end
  $stderr.puts "WARN: #{self.to_s} #{parental_sequences.keys} is not of size 2 (#{parental_strings.size})" if parental_strings.size != 2

  local_pos_in_parental = get_snp_position_after_trim
  i = 0
  while i < parental_strings[0].size  do
    if local_pos_in_parental == 0 and parental_strings[0][i] != "-"
      pos = i
      if parental_strings[0][i] == parental_strings[1][i]
        $stderr.puts "WARN: #{self.to_s} doesn't have a SNP in the marked place (#{i})! \n#{parental_strings[0]}\n#{parental_strings[1]}"
      end 
    end
  
    local_pos_in_parental -= 1 if parental_strings[0][i] != "-"
    i += 1
  end
  @aligned_snp_position = pos
  return pos
end

#chromosome_genomeObject



151
152
153
# File 'lib/bio/PolyploidTools/SNP.rb', line 151

def chromosome_genome
  chromosome[1]
end

#chromosome_groupObject

We Only want the chromosome, we drop the arm. We don’t use this any more. def chromosome= (chr)

@chromosome = chr

end



147
148
149
# File 'lib/bio/PolyploidTools/SNP.rb', line 147

def chromosome_group
  chromosome[0]
end

#covered_regionObject



174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
# File 'lib/bio/PolyploidTools/SNP.rb', line 174

def covered_region
  return @covered_region if @covered_region
  if self.use_reference
     reg = Bio::DB::Fasta::Region.new()
     reg.entry = gene
     reg.orientation = :forward
     reg.start = self.position - self.flanking_size
     reg.end = self.position + self.flanking_size
     reg.start = 1 if reg.start < 1
     return reg
  end
  
  min = @position
  max = @position
  # puts "Calculating covered region for #{self.inspect}"
  # puts "#{@exon_list.inspect}"
  # raise SNPException.new "Exons haven't been loaded for #{self.to_s}" if @exon_list.size == 0
  if @exon_list.size == 0
    min = self.position - self.flanking_size
    min = 1 if min < 1
    max =  self.position + self.flanking_size
  end
  @exon_list.each do | chromosome, exon_arr |
    exon_arr.each do | exon |
      reg = exon.query_region
      min = reg.start if reg.start < min
      max = reg.end if reg.end > max
    end
  end

  reg = Bio::DB::Fasta::Region.new()
  reg.entry = gene
  reg.orientation = :forward
  reg.start = min
  reg.end = max

  @covered_region = reg
  @covered_region
end

#cut_and_pad_sequence_to_primer_region(sequence) ⇒ Object



537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
# File 'lib/bio/PolyploidTools/SNP.rb', line 537

def cut_and_pad_sequence_to_primer_region(sequence)
  ideal_min = self.local_position - flanking_size 
  ideal_max = self.local_position + flanking_size
  left_pad = 0
  right_pad=0
  if ideal_min < 0
    left_pad = ideal_min * -1
    ideal_min = 0 
  end
  if ideal_max > sequence.size
    right_pad = ideal_max - sequence.size
    ideal_max = sequence.size - 1 
  end  
  ret = "-" * left_pad  << sequence[ideal_min..ideal_max] <<  "-" * right_pad 
  ret
end

#cut_sequence_to_primer_region(sequence) ⇒ Object



528
529
530
531
532
533
534
535
# File 'lib/bio/PolyploidTools/SNP.rb', line 528

def cut_sequence_to_primer_region(sequence)
  ideal_min = self.local_position - flanking_size 
  ideal_max = self.local_position + flanking_size
  ideal_min = 0 if ideal_min < 0
  ideal_max = sequence.size - 1 if ideal_max > sequence.size
  # len = ideal_max - ideal_min
  sequence[ideal_min..ideal_max]
end

#exon_for_chromosome(chromosome) ⇒ Object



466
467
468
469
470
# File 'lib/bio/PolyploidTools/SNP.rb', line 466

def exon_for_chromosome (chromosome)
  selected_exon=exon_list[chromosome]
  puts "No exon with chromosome #{chromosome} for #{gene}"  unless selected_exon
  selected_exon
end

#exon_sequencesObject



774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
# File 'lib/bio/PolyploidTools/SNP.rb', line 774

def exon_sequences
  return @exon_sequences if @exon_sequences
  gene_region = self.covered_region
  local_pos_in_gene = self.local_position
  @exon_sequences = Bio::Alignment::SequenceHash.new
  self.exon_list.each do |chromosome, exon_arr| 
    exon_arr.each do |exon|
      exon_start_offset = exon.query_region.start - gene_region.start
      exon_seq  = "-" * exon_start_offset 
      exon_seq << container.chromosome_sequence(exon.target_region).to_s
      #puts exon_seq
      #l_pos = exon_start_offset + local_pos_in_gene
      unless exon.snp_in_gap
        #puts "local position: #{local_pos_in_gene}"
        #puts "Exon_seq: #{exon_seq}"
        exon_seq[local_pos_in_gene] = exon_seq[local_pos_in_gene].upcase
        exon_seq << "-" * (gene_region.size - exon_seq.size + 1)
        #puts exon.inspect
        @exon_sequences["#{chromosome}_#{exon.query_region.start}_#{exon.record.score}"] = exon_seq
      end
    end
  end
  @exon_sequences[@chromosome] = "-" * gene_region.size unless @exon_sequences[@chromosome]
  @exon_sequences
end

#get_snp_position_after_trimObject



588
589
590
591
592
593
594
595
596
597
598
# File 'lib/bio/PolyploidTools/SNP.rb', line 588

def get_snp_position_after_trim
  local_pos_in_gene = self.local_position
  ideal_min = self.local_position - flanking_size 
  ideal_max = self.local_position + flanking_size
  left_pad = 0
  if ideal_min < 0
    left_pad = ideal_min * -1
    ideal_min = 0 
  end
  local_pos_in_gene - ideal_min
end

#get_target_sequence(names, chromosome) ⇒ Object



627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
# File 'lib/bio/PolyploidTools/SNP.rb', line 627

def get_target_sequence(names, chromosome)
  
  best = chromosome
  best_score = 0
  names.each do |e|  
    arr = e.split("_")
    if arr.length == 3
      score = arr[2].to_f
      if score >best_score
        best_score = score 
        best = e
      end
    end
  end
  best
end

#left_paddingObject



214
215
216
217
218
# File 'lib/bio/PolyploidTools/SNP.rb', line 214

def left_padding
  flanking_size - self.local_position + 1
  #  primer_region.start - covered_region.start 
  # 0
end

#local_positionObject



226
227
228
229
# File 'lib/bio/PolyploidTools/SNP.rb', line 226

def local_position
#      puts "local_position #{self.position} #{self.covered_region.start}"
  self.position - self.covered_region.start
end

#mask_aligned_chromosomal_snp(chromosome) ⇒ Object



646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
# File 'lib/bio/PolyploidTools/SNP.rb', line 646

def mask_aligned_chromosomal_snp(chromosome)
  names = aligned_sequences.keys
  parentals =  parental_sequences.keys

  position_after_trim = get_snp_position_after_trim

  names = names - parentals
  local_pos_in_gene = aligned_snp_position

  best_target = get_target_sequence(names, chromosome)
  masked_snps = aligned_sequences[best_target].downcase if aligned_sequences[best_target]
  masked_snps = "-" * aligned_sequences.values[0].size  unless aligned_sequences[best_target]
  #TODO: Make this chromosome specific, even when we have more than one alignment going to the region we want.
  #puts "mask_aligned_chromosomal_snp(#{chromosome})"
  #puts names
  i = 0
  for  i in 0..masked_snps.size-1
    #puts i
    different = 0
    cov = 0
    from_group = 0
    nCount = 0
    seen = []
    names.each do | chr |
      if aligned_sequences[chr] and aligned_sequences[chr][i]  != "-"
        #puts aligned_sequences[chr][i]
        cov += 1 
        nCount += 1 if aligned_sequences[chr][i] == 'N' or  aligned_sequences[chr][i] == 'n' # maybe fix this to use ambiguity codes instead. 
        
        if chr[0] == chromosome_group and  not seen.include? chr[1]
          seen << chr[1]
          from_group += 1 

        end
        #puts "Comparing #{chromosome_group} and #{chr[0]} as chromosomes"
        if chr != best_target 
          $stderr.puts "WARN: No base for #{masked_snps} : ##{i}" unless masked_snps[i].upcase
          $stderr.puts "WARN: No base for #{aligned_sequences[chr]} : ##{i}" unless masked_snps[i].upcase
          different += 1  if masked_snps[i].upcase != aligned_sequences[chr][i].upcase 
        end
      end
    end
    masked_snps[i] = "-" if different == 0
    masked_snps[i] = "-" if cov == 1
    masked_snps[i] = "-" if nCount > 0
    masked_snps[i] = "*" if cov == 0
    expected_snps = names.size - 1

   #puts "Diferences: #{different} to expected: #{ expected_snps } [#{i}] Genome count (#{from_group} == #{genomes_count})"
    
    masked_snps[i] = masked_snps[i].upcase if different == expected_snps and from_group == genomes_count
    #puts "#{i}:#{masked_snps[i]}"

    if i == local_pos_in_gene
      masked_snps[i] = "&"
      #puts "#{i}:#{masked_snps[i]}___"
      bases = ""
      names.each do | chr |
        bases << aligned_sequences[chr][i]  if aligned_sequences[chr] and aligned_sequences[chr][i]  != "-"
      end
      
      code_reference = "n"
      code_reference = Bio::NucleicAcid.to_IUAPC(bases) unless bases == ""

      if Bio::NucleicAcid.is_valid(code_reference,   original) and Bio::NucleicAcid.is_valid(code_reference,   snp)
        masked_snps[i] = ":"
      end 

    end
    #i += 1
  end
  masked_snps
end

#padded_position(pos) ⇒ Object



231
232
233
# File 'lib/bio/PolyploidTools/SNP.rb', line 231

def padded_position(pos)
  pos + left_padding
end

#parental_sequencesObject



472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
# File 'lib/bio/PolyploidTools/SNP.rb', line 472

def parental_sequences
  return @parental_sequences if @parental_sequences
  gene_region = self.covered_region
  local_pos_in_gene = self.local_position

  @parental_sequences = Bio::Alignment::SequenceHash.new
  container.parents.each  do |name, bam|
    seq = nil
    if bam
      seq = bam.consensus_with_ambiguities({:region=>gene_region}).to_s
    else
      seq = container.gene_model_sequence(gene_region)
      unless name == self.snp_in
        seq[local_pos_in_gene] = self.original 
      end
    end
    seq[local_pos_in_gene] = seq[local_pos_in_gene].upcase
    
    seq[local_pos_in_gene] = self.snp if name == self.snp_in    
    @parental_sequences [name] = seq
  end
  @parental_sequences
end

#primer_3_all_strings(target_chromosome, parental) ⇒ Object



374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
# File 'lib/bio/PolyploidTools/SNP.rb', line 374

def primer_3_all_strings(target_chromosome, parental) 

  pr = primer_region(target_chromosome, parental )
  primer_3_propertes = Array.new

  seq_original = String.new(pr.sequence)
  
  if seq_original.size < primer_3_min_seq_length
    errors << "The sequence (#{seq_original.size}) is shorter than #{primer_3_min_seq_length}"
    return primer_3_propertes 
  end
  
  if self.hit_count > self.max_hits
    errors << "The marker maps to #{self.hit_count} positions (max_hits: #{self.max_hits}). "
    repetitive = true
    return primer_3_propertes 
  end
  seq_original[pr.snp_pos] = self.original
  seq_original_reverse = reverse_complement_string(seq_original)

  seq_snp =  String.new(pr.sequence)
  seq_snp[pr.snp_pos] =  self.snp
  seq_snp_reverse = reverse_complement_string(seq_snp)

  rev_pos = seq_snp.size - position

  if pr.homoeologous
    @snp_type = "homoeologous"
  else
    @snp_type = "non-homoeologous"
  end

  pr.chromosome_specific.each do |pos|

    args = {:name =>"#{gene}:#{original}#{position}#{snp} #{original_name} chromosome_specific exon #{@snp_type} #{chromosome}", :left_pos => pr.snp_pos, :right_pos => pos, :sequence=>seq_original}
    primer_3_propertes << return_primer_3_string(args)
    args[:name] = "#{gene}:#{original}#{position}#{snp} #{snp_in} chromosome_specific exon #{@snp_type} #{chromosome}"
    args[:sequence] = seq_snp
    primer_3_propertes << return_primer_3_string(args)
  end

  pr.almost_chromosome_specific.each do |pos|
    args = {:name =>"#{gene}:#{original}#{position}#{snp} #{original_name} chromosome_semispecific exon #{@snp_type} #{chromosome}", :left_pos => pr.snp_pos, :right_pos => pos, :sequence=>seq_original}
    primer_3_propertes << return_primer_3_string(args)
    args[:name] = "#{gene}:#{original}#{position}#{snp} #{snp_in} chromosome_semispecific exon #{@snp_type} #{chromosome}"
    args[:sequence] = seq_snp
    primer_3_propertes << return_primer_3_string(args)

  end

  pr.crhomosome_specific_intron.each do |pos|

    args = {:name =>"#{gene}:#{original}#{position}#{snp} #{original_name} chromosome_specific intron #{@snp_type} #{chromosome}", :left_pos => pr.snp_pos, :right_pos => pos, :sequence=>seq_original}
    primer_3_propertes << return_primer_3_string(args)
    args[:name] = "#{gene}:#{original}#{position}#{snp} #{snp_in} chromosome_specific exon #{@snp_type} #{chromosome}"
    args[:sequence] = seq_snp
    primer_3_propertes << return_primer_3_string(args)
  end

  pr.almost_crhomosome_specific_intron.each do |pos|
    args = {:name =>"#{gene}:#{original}#{position}#{snp} #{original_name} chromosome_semispecific intron #{@snp_type} #{chromosome}", :left_pos => pr.snp_pos, :right_pos => pos, :sequence=>seq_original}
    primer_3_propertes << return_primer_3_string(args)
    args[:name] = "#{gene}:#{original}#{position}#{snp} #{snp_in} chromosome_semispecific exon #{@snp_type} #{chromosome}"
    args[:sequence] = seq_snp
    primer_3_propertes << return_primer_3_string(args)

  end


  args = {:name =>"#{gene}:#{original}#{position}#{snp} #{original_name} chromosome_nonspecific all #{@snp_type} #{chromosome}", :left_pos => pr.snp_pos, :sequence=>seq_original}
  primer_3_propertes << return_primer_3_string(args)
  args[:name] = "#{gene}:#{original}#{position}#{snp} #{snp_in} chromosome_nonspecific all #{@snp_type} #{chromosome}"
  args[:sequence] = seq_snp
  primer_3_propertes << return_primer_3_string(args)


  primer_3_propertes
end

#primer_3_string(target_chromosome, parental) ⇒ Object



461
462
463
464
# File 'lib/bio/PolyploidTools/SNP.rb', line 461

def primer_3_string(target_chromosome, parental) 
  strings = primer_3_all_strings(target_chromosome, parental) 
  strings.join
end

#primer_fasta_stringObject



235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
# File 'lib/bio/PolyploidTools/SNP.rb', line 235

def primer_fasta_string
  gene_region = self.covered_region
  local_pos_in_gene = self.local_position
  ret_str = ""

  surrounding_parental_sequences.each do |name, seq|
    ret_str << ">#{gene_region.entry}-#{self.position}_#{name}\n" 
    ret_str << "#{seq}\n"
  end

  self.surrounding_exon_sequences.each do |chromosome, exon_seq|
    ret_str << ">#{chromosome}\n#{exon_seq}\n"
  end

  mask = surrounding_masked_chromosomal_snps(chromosome)
  ret_str << ">Mask\n#{mask}\n"

  pr = primer_region(chromosome, snp_in )
  ret_str << pr.to_fasta
  ret_str
end

#primer_region(target_chromosome, parental) ⇒ Object



257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
# File 'lib/bio/PolyploidTools/SNP.rb', line 257

def primer_region(target_chromosome, parental )
  
  parental = aligned_sequences[parental].downcase
  names = aligned_sequences.keys
  target_chromosome = get_target_sequence(names, target_chromosome)
  
  chromosome_seq = aligned_sequences[target_chromosome]
  chromosome_seq = "-" * parental.size unless chromosome_seq
  chromosome_seq = chromosome_seq.downcase
  mask = mask_aligned_chromosomal_snp(target_chromosome)

  pr = PrimerRegion.new
  position_in_region = 0
  (0..parental.size-1).each do |i|

    if chromosome_seq[i] != '-' or parental[i] != '-'
      case   
      when mask[i] == '&'
        #This is the SNP we take the parental
        pr.snp_pos = position_in_region
        pr.homoeologous = false
      when mask[i] == ':'
        #This is the SNP we take the parental
        pr.snp_pos = position_in_region
        pr.homoeologous = true
      when mask[i] == '-'
        #When the mask doesnt detect a SNP, so we take the parental
        parental[i] = chromosome_seq[i] unless Bio::NucleicAcid::is_unambiguous(parental[i])

      when /[[:upper:]]/.match(mask[i])
        #This is a good candidate for marking a SNP
        #We validate that the consensus from the sam file accepts the variation from the chromosomal sequence
        if parental[i] == '-'
          parental[i] = mask[i]
          pr.crhomosome_specific_intron << position_in_region
        elsif Bio::NucleicAcid.is_valid(parental[i], mask[i])
          parental[i] = mask[i]
          pr.chromosome_specific << position_in_region
        end
      when /[[:lower:]]/.match(mask[i])
        #this is not that good candidate, but sitll gives specificity

        if parental[i] == '-'
          parental[i] = mask[i]
          pr.almost_crhomosome_specific_intron << position_in_region
        elsif Bio::NucleicAcid.is_valid(parental[i], mask[i])
          parental[i] = mask[i].upcase
          pr.almost_chromosome_specific << position_in_region
        end
      end #Case closes
      position_in_region += 1
    end #Closes region with bases 
  end         

  pr.sequence=parental.gsub('-','')
  pr
end

#return_primer_3_string(opts = {}) ⇒ Object



320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
# File 'lib/bio/PolyploidTools/SNP.rb', line 320

def return_primer_3_string(opts={})

  left = opts[:left_pos]
  right = opts[:right_pos]
  sequence =  opts[:sequence]
  extra =  opts[:extra]

  orientation = "forward"
  if opts[:right_pos]
    orientation = "forward"
    if left > right
      left = sequence.size - left - 1
      right = sequence.size - right - 1
      sequence = reverse_complement_string(sequence)
      orientation = "reverse"
    end
    if @variation_free_region > 0
      check_str = sequence[right+1, @variation_free_region]
      return nil if check_str != check_str.downcase
    end

  end

  #puts "__"
  #puts self.inspect
  str = "SEQUENCE_ID=#{opts[:name]} #{orientation} \n"
  str << "SEQUENCE_FORCE_LEFT_END=#{left}\n" unless  opts[:extra_f]
  str << "SEQUENCE_FORCE_RIGHT_END=#{right}\n" if opts[:right_pos]
  str << extra if extra
  str << opts[:extra_f] if opts[:extra_f]
  str << "SEQUENCE_TEMPLATE=#{sequence}\n"
  
 
  str << "=\n"


  #In case that we don't have a right primer, we do both orientations
  unless opts[:right_pos]
    sequence =  opts[:sequence]    
    left = sequence.size - left - 1
    orientation = "reverse"
    sequence = reverse_complement_string(sequence)
    str << "SEQUENCE_ID=#{opts[:name]} #{orientation}\n"
    str << "SEQUENCE_FORCE_LEFT_END=#{left}\n" unless  opts[:extra_r]
    str << opts[:extra_r] if opts[:extra_r]
    str << "SEQUENCE_TEMPLATE=#{sequence}\n"
    str << extra if extra
    str << "=\n"
  end

  str
end

#reverse_complement_string(sequenc_str) ⇒ Object



315
316
317
318
# File 'lib/bio/PolyploidTools/SNP.rb', line 315

def reverse_complement_string(sequenc_str)
  complement = sequenc_str.tr('atgcrymkdhvbswnATGCRYMKDHVBSWN', 'tacgyrkmhdbvswnTACGYRKMHDBVSWN')
  complement.reverse!
end

#right_paddingObject



220
221
222
223
224
# File 'lib/bio/PolyploidTools/SNP.rb', line 220

def right_padding
  ret =  (2*flanking_size) - (left_padding  + self.covered_region.size ) 
  ret = 0 if ret < 0
  ret  
end

#sequences_to_alignObject



554
555
556
557
# File 'lib/bio/PolyploidTools/SNP.rb', line 554

def sequences_to_align
  @sequences_to_align = surrounding_parental_sequences.merge(surrounding_exon_sequences) unless @sequences_to_align
  @sequences_to_align
end

#setTemplateFromFastaFile(fastaFile, flanking_size: 100) ⇒ Object



75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# File 'lib/bio/PolyploidTools/SNP.rb', line 75

def setTemplateFromFastaFile(fastaFile ,flanking_size: 100)
  reg = Bio::DB::Fasta::Region.new
  reg.entry = gene
  reg.entry = @contig if @contig
  reg.start = position - flanking_size
  reg.end = position + flanking_size + 1
  reg.orientation = :forward
  entry = fastaFile.index.region_for_entry(reg.entry)
  reg.start = 1 if reg.start < 1
  reg.end = entry.length if reg.end > entry.length
  amb = Bio::NucleicAcid.to_IUAPC("#{original}#{snp}")
  @position = @position - reg.start + 1
  @position = 1 if @position < 1
  #puts "about to fetch"
  self.template_sequence = fastaFile.fetch_sequence(reg)
  #puts "done fetching"
  template_sequence[position - 1] = amb
end

#short_sObject



457
458
459
# File 'lib/bio/PolyploidTools/SNP.rb', line 457

def short_s
  "#{original}#{position}#{snp}".upcase
end

#snp_id_in_seqObject



137
138
139
# File 'lib/bio/PolyploidTools/SNP.rb', line 137

def snp_id_in_seq
  "#{original}#{position}#{snp}"
end

#surrounding_exon_sequencesObject



753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
# File 'lib/bio/PolyploidTools/SNP.rb', line 753

def surrounding_exon_sequences
  return @surrounding_exon_sequences if @surrounding_exon_sequences
  gene_region = self.covered_region
  @surrounding_exon_sequences =  Bio::Alignment::SequenceHash.new
  self.exon_list.each do |chromosome, exon_arr| 
    exon_arr.each do |exon|
      exon_start_offset = exon.query_region.start - gene_region.start
      flanking_region  = exon.target_flanking_region_from_position(position,flanking_size)
      #TODO: Padd when the exon goes over the regions... 
      #puts flanking_region.inspect
      #Ignoring when the exon is in a gap
      unless exon.snp_in_gap 
        exon_seq = container.chromosome_sequence(flanking_region)
        @surrounding_exon_sequences["#{chromosome}_#{flanking_region.start}_#{exon.record.score}"] = exon_seq
      end
    end
  end
  @surrounding_exon_sequences
end

#surrounding_masked_chromosomal_snps(chromosome) ⇒ Object



721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
# File 'lib/bio/PolyploidTools/SNP.rb', line 721

def surrounding_masked_chromosomal_snps(chromosome)

  chromosomes = surrounding_exon_sequences
  names = chromosomes.keys
  get_target_sequence(names)
  masked_snps = chromosomes[chromosome].tr("-","+") if chromosomes[chromosome]
  masked_snps = "-" * (flanking_size * 2 ) unless chromosomes[chromosome]
  local_pos_in_gene = flanking_size 
  i = 0
  while i < masked_snps.size  do
    different = 0
    cov = 0
    names.each do | chr |
      if chromosomes[chr][i]  != "-" and chromosomes[chr][i]. != 'N' and chromosomes[chr][i]. != 'n'
        cov += 1 
        if chr != chromosome and masked_snps[i] != "+"
          different += 1  if masked_snps[i] != chromosomes[chr][i] 
        end
      end
    end
    masked_snps[i] = "-" if different == 0 and  masked_snps[i] != "+"
    masked_snps[i] = "-" if cov < 2
    masked_snps[i] = masked_snps[i].upcase if different > 1   

    if i == local_pos_in_gene
      masked_snps[i] = "&"
    end
    i += 1
  end
  masked_snps
end

#surrounding_parental_sequencesObject



499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
# File 'lib/bio/PolyploidTools/SNP.rb', line 499

def surrounding_parental_sequences
  return @surrounding_parental_sequences if @surrounding_parental_sequences
  gene_region = self.covered_region
  local_pos_in_gene = self.local_position

  @surrounding_parental_sequences = Bio::Alignment::SequenceHash.new
  container.parents.each  do |name, bam|
    seq = nil
    if bam
      seq =  bam.consensus_with_ambiguities({:region=>gene_region}).to_s
    else
      seq = container.gene_model_sequence(gene_region)
      #puts "#{name} #{self.snp_in}"
      #puts "Modifing original: #{name}\n#{seq}"  
      unless name == self.snp_in

        seq[local_pos_in_gene] = self.original 
      else
        seq[local_pos_in_gene] = self.snp 
      end
      #puts "#{seq}"  
    end
    seq[local_pos_in_gene] = seq[local_pos_in_gene].upcase
    seq[local_pos_in_gene] = self.snp if name == self.snp_in  
    @surrounding_parental_sequences [name] = cut_and_pad_sequence_to_primer_region(seq)
  end
  @surrounding_parental_sequences
end

#to_fastaObject



160
161
162
# File 'lib/bio/PolyploidTools/SNP.rb', line 160

def to_fasta
    return ">#{self.gene}\n#{self.template_sequence}\n"
end

#to_polymarker_coordinates(flanking_size, total: nil) ⇒ Object



106
107
108
109
110
111
112
113
# File 'lib/bio/PolyploidTools/SNP.rb', line 106

def to_polymarker_coordinates(flanking_size, total:nil)
  start = position - flanking_size + 1
  start = 0 if start < 0
  total = flanking_size * 2 unless total
  total += 1
  new_position = position - start + 2
  [start , total, new_position ]
end

#to_polymarker_sequence(flanking_size, total: nil) ⇒ Object



115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# File 'lib/bio/PolyploidTools/SNP.rb', line 115

def to_polymarker_sequence(flanking_size, total:nil)
  out = template_sequence.clone
  snp_seq = "[#{original}/#{snp}]"
  p = position-1 
  if orientation == :reverse
    p = out.length - p - 1
    s = Bio::Sequence::NA.new(out)
    s1 = Bio::Sequence::NA.new(original)
    s2 = Bio::Sequence::NA.new(snp) 
    out = s.reverse_complement
    snp_seq = "[#{s1.reverse_complement}/#{s2.reverse_complement}]"

  end

  out[p]  = snp_seq
  start = position - flanking_size - 1
  start = 0 if start < 0
  total = flanking_size * 2 unless total
  total += 5
  out[start , total ].upcase
end

#to_sObject



453
454
455
# File 'lib/bio/PolyploidTools/SNP.rb', line 453

def to_s
  "#{gene}:#{original}#{position}#{snp}#{chromosome}"
end