Class: Bio::PolyploidTools::SNP
- Inherits:
-
Object
- Object
- Bio::PolyploidTools::SNP
- Defined in:
- lib/bio/PolyploidTools/SNP.rb
Direct Known Subclasses
Instance Attribute Summary collapse
-
#chromosome ⇒ Object
Returns the value of attribute chromosome.
-
#container ⇒ Object
Returns the value of attribute container.
-
#contig ⇒ Object
Returns the value of attribute contig.
-
#errors ⇒ Object
Returns the value of attribute errors.
-
#exon_list ⇒ Object
Returns the value of attribute exon_list.
-
#flanking_size ⇒ Object
Returns the value of attribute flanking_size.
-
#gene ⇒ Object
GENE,ORIGINAL,POS,SNP.
-
#genomes_count ⇒ Object
Returns the value of attribute genomes_count.
-
#hit_count ⇒ Object
Returns the value of attribute hit_count.
-
#ideal_max ⇒ Object
Returns the value of attribute ideal_max.
-
#ideal_min ⇒ Object
Returns the value of attribute ideal_min.
-
#max_hits ⇒ Object
Returns the value of attribute max_hits.
-
#orientation ⇒ Object
Returns the value of attribute orientation.
-
#original ⇒ Object
GENE,ORIGINAL,POS,SNP.
-
#original_name ⇒ Object
GENE,ORIGINAL,POS,SNP.
-
#position ⇒ Object
GENE,ORIGINAL,POS,SNP.
-
#primer_3_min_seq_length ⇒ Object
Returns the value of attribute primer_3_min_seq_length.
-
#repetitive ⇒ Object
Returns the value of attribute repetitive.
-
#snp ⇒ Object
GENE,ORIGINAL,POS,SNP.
-
#snp_in ⇒ Object
GENE,ORIGINAL,POS,SNP.
-
#snp_type ⇒ Object
Returns the value of attribute snp_type.
-
#template_sequence ⇒ Object
Returns the value of attribute template_sequence.
-
#use_reference ⇒ Object
Returns the value of attribute use_reference.
-
#variation_free_region ⇒ Object
Returns the value of attribute variation_free_region.
Class Method Summary collapse
-
.parse(reg_str) ⇒ Object
Format: Gene_name,Original,SNP_Pos,pos,chromosome A_comp0_c0_seq1,C,519,A,2A.
-
.parseVCF(vcf_line, chr_arm_parser: Bio::PolyploidTools::ChromosomeArm.getArmSelection("first_two")) ⇒ Object
Format: IWGSC_CSS_1AL_scaff_1455974 127 test_snp C T 135.03 .
Instance Method Summary collapse
- #add_exon(exon, arm, filter_best: true) ⇒ Object
- #aligned_sequences ⇒ Object
- #aligned_sequences_fasta ⇒ Object
- #aligned_snp_position ⇒ Object
- #chromosome_genome ⇒ Object
-
#chromosome_group ⇒ Object
We Only want the chromosome, we drop the arm.
- #covered_region ⇒ Object
- #cut_and_pad_sequence_to_primer_region(sequence) ⇒ Object
- #cut_sequence_to_primer_region(sequence) ⇒ Object
- #exon_for_chromosome(chromosome) ⇒ Object
- #exon_sequences ⇒ Object
- #get_snp_position_after_trim ⇒ Object
- #get_target_sequence(names, chromosome) ⇒ Object
-
#initialize ⇒ SNP
constructor
A new instance of SNP.
- #left_padding ⇒ Object
- #local_position ⇒ Object
- #mask_aligned_chromosomal_snp(chromosome) ⇒ Object
- #padded_position(pos) ⇒ Object
- #parental_sequences ⇒ Object
- #primer_3_all_strings(target_chromosome, parental) ⇒ Object
- #primer_3_string(target_chromosome, parental) ⇒ Object
- #primer_fasta_string ⇒ Object
- #primer_region(target_chromosome, parental) ⇒ Object
- #return_primer_3_string(opts = {}) ⇒ Object
- #reverse_complement_string(sequenc_str) ⇒ Object
- #right_padding ⇒ Object
- #sequences_to_align ⇒ Object
- #setTemplateFromFastaFile(fastaFile, flanking_size: 100) ⇒ Object
- #short_s ⇒ Object
- #snp_id_in_seq ⇒ Object
- #surrounding_exon_sequences ⇒ Object
- #surrounding_masked_chromosomal_snps(chromosome) ⇒ Object
- #surrounding_parental_sequences ⇒ Object
- #to_fasta ⇒ Object
- #to_polymarker_coordinates(flanking_size, total: nil) ⇒ Object
- #to_polymarker_sequence(flanking_size, total: nil) ⇒ Object
- #to_s ⇒ Object
Constructor Details
#initialize ⇒ SNP
Returns a new instance of SNP.
94 95 96 97 98 99 100 101 102 103 104 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 94 def initialize @genomes_count = 3 @primer_3_min_seq_length = 50 @variation_free_region = 0 @contig = false @max_hits = 8 @exon_list = Hash.new {|hsh, key| hsh[key] = [] } @errors = Array.new @repetitive = false @hit_count = 0 end |
Instance Attribute Details
#chromosome ⇒ Object
Returns the value of attribute chromosome.
17 18 19 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 17 def chromosome @chromosome end |
#container ⇒ Object
Returns the value of attribute container.
11 12 13 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 11 def container @container end |
#contig ⇒ Object
Returns the value of attribute contig.
9 10 11 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 9 def contig @contig end |
#errors ⇒ Object
Returns the value of attribute errors.
20 21 22 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 20 def errors @errors end |
#exon_list ⇒ Object
Returns the value of attribute exon_list.
10 11 12 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 10 def exon_list @exon_list end |
#flanking_size ⇒ Object
Returns the value of attribute flanking_size.
12 13 14 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 12 def flanking_size @flanking_size end |
#gene ⇒ Object
GENE,ORIGINAL,POS,SNP
8 9 10 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 8 def gene @gene end |
#genomes_count ⇒ Object
Returns the value of attribute genomes_count.
15 16 17 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 15 def genomes_count @genomes_count end |
#hit_count ⇒ Object
Returns the value of attribute hit_count.
22 23 24 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 22 def hit_count @hit_count end |
#ideal_max ⇒ Object
Returns the value of attribute ideal_max.
12 13 14 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 12 def ideal_max @ideal_max end |
#ideal_min ⇒ Object
Returns the value of attribute ideal_min.
12 13 14 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 12 def ideal_min @ideal_min end |
#max_hits ⇒ Object
Returns the value of attribute max_hits.
19 20 21 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 19 def max_hits @max_hits end |
#orientation ⇒ Object
Returns the value of attribute orientation.
24 25 26 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 24 def orientation @orientation end |
#original ⇒ Object
GENE,ORIGINAL,POS,SNP
8 9 10 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 8 def original @original end |
#original_name ⇒ Object
GENE,ORIGINAL,POS,SNP
8 9 10 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 8 def original_name @original_name end |
#position ⇒ Object
GENE,ORIGINAL,POS,SNP
8 9 10 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 8 def position @position end |
#primer_3_min_seq_length ⇒ Object
Returns the value of attribute primer_3_min_seq_length.
16 17 18 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 16 def primer_3_min_seq_length @primer_3_min_seq_length end |
#repetitive ⇒ Object
Returns the value of attribute repetitive.
21 22 23 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 21 def repetitive @repetitive end |
#snp ⇒ Object
GENE,ORIGINAL,POS,SNP
8 9 10 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 8 def snp @snp end |
#snp_in ⇒ Object
GENE,ORIGINAL,POS,SNP
8 9 10 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 8 def snp_in @snp_in end |
#snp_type ⇒ Object
Returns the value of attribute snp_type.
23 24 25 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 23 def snp_type @snp_type end |
#template_sequence ⇒ Object
Returns the value of attribute template_sequence.
13 14 15 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 13 def template_sequence @template_sequence end |
#use_reference ⇒ Object
Returns the value of attribute use_reference.
14 15 16 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 14 def use_reference @use_reference end |
#variation_free_region ⇒ Object
Returns the value of attribute variation_free_region.
18 19 20 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 18 def variation_free_region @variation_free_region end |
Class Method Details
.parse(reg_str) ⇒ Object
Format: Gene_name,Original,SNP_Pos,pos,chromosome A_comp0_c0_seq1,C,519,A,2A
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 29 def self.parse(reg_str) reg_str.chomp! snp = SNP.new snp.gene, snp.original, snp.position, snp.snp, snp.chromosome = reg_str.split(",") snp.position.strip! snp.position = snp.position.to_i snp.original.upcase! snp.original.strip! snp.snp.upcase! snp.snp.strip! snp.chromosome.strip! snp.use_reference = false snp end |
.parseVCF(vcf_line, chr_arm_parser: Bio::PolyploidTools::ChromosomeArm.getArmSelection("first_two")) ⇒ Object
Format: IWGSC_CSS_1AL_scaff_1455974 127 test_snp C T 135.03 .
47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 47 def self.parseVCF(vcf_line, chr_arm_parser: Bio::PolyploidTools::ChromosomeArm.getArmSelection("first_two") ) snp = SNP.new arr = vcf_line.split("\t") snp.gene = arr[2] snp.original = arr[3] snp.position = arr[1] snp.snp = arr[4] snp.chromosome = chr_arm_parser.call(arr[0]) snp.contig = arr[0] snp.position.strip! snp.position = snp.position.to_i snp.original.upcase! snp.original.strip! snp.snp.upcase! snp.snp.strip! snp.chromosome.strip! snp.orientation = :forward info = arr[7] if info details = info.scan(/(\w+)=([\w|.]+)/).collect { |id, value| { :id => id, :value => value }} details.each do |e| snp.orientation = :reverse if e[:id] == "OR" and e[:value] == "reverse" end end return snp end |
Instance Method Details
#add_exon(exon, arm, filter_best: true) ⇒ Object
164 165 166 167 168 169 170 171 172 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 164 def add_exon(exon, arm, filter_best: true) exon_list[arm] = Array.new unless exon_list[arm] if filter_best and exon_list[arm].size > 0 current = exon_list[arm].first exon_list[arm] = [exon] if exon.record.score > current.record.score else exon_list[arm] << exon end end |
#aligned_sequences ⇒ Object
559 560 561 562 563 564 565 566 567 568 569 570 571 572 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 559 def aligned_sequences return @aligned_sequences if @aligned_sequences return Hash.new if sequences_to_align.size == 0 = ['--maxiterate', '1000', '--localpair', '--quiet'] mafft = Bio::MAFFT.new( "mafft" , ) #puts "Before MAFT:#{sequences_to_align.inspect}" report = mafft.query_align(sequences_to_align) @aligned_sequences = report.alignment # puts "MAFFT: #{report.alignment.inspect}" @aligned_sequences end |
#aligned_sequences_fasta ⇒ Object
574 575 576 577 578 579 580 581 582 583 584 585 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 574 def aligned_sequences_fasta ret_str = "" aligned_sequences.each_pair do |name, seq| ret_str << ">#{self.to_s}-#{name}\n#{seq}\n" end ret_str << ">MASK #{chromosome}\n#{mask_aligned_chromosomal_snp(chromosome)}\n" pr = primer_region(chromosome, snp_in ) ret_str << pr.to_fasta ret_str ret_str end |
#aligned_snp_position ⇒ Object
600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 600 def aligned_snp_position return @aligned_snp_position if @aligned_snp_position #puts self.inspect pos = -1 parental_strings = Array.new parental_sequences.keys.each do | par | parental_strings << aligned_sequences[par] end $stderr.puts "WARN: #{self.to_s} #{parental_sequences.keys} is not of size 2 (#{parental_strings.size})" if parental_strings.size != 2 local_pos_in_parental = get_snp_position_after_trim i = 0 while i < parental_strings[0].size do if local_pos_in_parental == 0 and parental_strings[0][i] != "-" pos = i if parental_strings[0][i] == parental_strings[1][i] $stderr.puts "WARN: #{self.to_s} doesn't have a SNP in the marked place (#{i})! \n#{parental_strings[0]}\n#{parental_strings[1]}" end end local_pos_in_parental -= 1 if parental_strings[0][i] != "-" i += 1 end @aligned_snp_position = pos return pos end |
#chromosome_genome ⇒ Object
151 152 153 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 151 def chromosome_genome chromosome[1] end |
#chromosome_group ⇒ Object
We Only want the chromosome, we drop the arm. We don’t use this any more. def chromosome= (chr)
@chromosome = chr
end
147 148 149 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 147 def chromosome_group chromosome[0] end |
#covered_region ⇒ Object
174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 174 def covered_region return @covered_region if @covered_region if self.use_reference reg = Bio::DB::Fasta::Region.new() reg.entry = gene reg.orientation = :forward reg.start = self.position - self.flanking_size reg.end = self.position + self.flanking_size reg.start = 1 if reg.start < 1 return reg end min = @position max = @position # puts "Calculating covered region for #{self.inspect}" # puts "#{@exon_list.inspect}" # raise SNPException.new "Exons haven't been loaded for #{self.to_s}" if @exon_list.size == 0 if @exon_list.size == 0 min = self.position - self.flanking_size min = 1 if min < 1 max = self.position + self.flanking_size end @exon_list.each do | chromosome, exon_arr | exon_arr.each do | exon | reg = exon.query_region min = reg.start if reg.start < min max = reg.end if reg.end > max end end reg = Bio::DB::Fasta::Region.new() reg.entry = gene reg.orientation = :forward reg.start = min reg.end = max @covered_region = reg @covered_region end |
#cut_and_pad_sequence_to_primer_region(sequence) ⇒ Object
537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 537 def cut_and_pad_sequence_to_primer_region(sequence) ideal_min = self.local_position - flanking_size ideal_max = self.local_position + flanking_size left_pad = 0 right_pad=0 if ideal_min < 0 left_pad = ideal_min * -1 ideal_min = 0 end if ideal_max > sequence.size right_pad = ideal_max - sequence.size ideal_max = sequence.size - 1 end ret = "-" * left_pad << sequence[ideal_min..ideal_max] << "-" * right_pad ret end |
#cut_sequence_to_primer_region(sequence) ⇒ Object
528 529 530 531 532 533 534 535 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 528 def cut_sequence_to_primer_region(sequence) ideal_min = self.local_position - flanking_size ideal_max = self.local_position + flanking_size ideal_min = 0 if ideal_min < 0 ideal_max = sequence.size - 1 if ideal_max > sequence.size # len = ideal_max - ideal_min sequence[ideal_min..ideal_max] end |
#exon_for_chromosome(chromosome) ⇒ Object
466 467 468 469 470 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 466 def exon_for_chromosome (chromosome) selected_exon=exon_list[chromosome] puts "No exon with chromosome #{chromosome} for #{gene}" unless selected_exon selected_exon end |
#exon_sequences ⇒ Object
774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 774 def exon_sequences return @exon_sequences if @exon_sequences gene_region = self.covered_region local_pos_in_gene = self.local_position @exon_sequences = Bio::Alignment::SequenceHash.new self.exon_list.each do |chromosome, exon_arr| exon_arr.each do |exon| exon_start_offset = exon.query_region.start - gene_region.start exon_seq = "-" * exon_start_offset exon_seq << container.chromosome_sequence(exon.target_region).to_s #puts exon_seq #l_pos = exon_start_offset + local_pos_in_gene unless exon.snp_in_gap #puts "local position: #{local_pos_in_gene}" #puts "Exon_seq: #{exon_seq}" exon_seq[local_pos_in_gene] = exon_seq[local_pos_in_gene].upcase exon_seq << "-" * (gene_region.size - exon_seq.size + 1) #puts exon.inspect @exon_sequences["#{chromosome}_#{exon.query_region.start}_#{exon.record.score}"] = exon_seq end end end @exon_sequences[@chromosome] = "-" * gene_region.size unless @exon_sequences[@chromosome] @exon_sequences end |
#get_snp_position_after_trim ⇒ Object
588 589 590 591 592 593 594 595 596 597 598 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 588 def get_snp_position_after_trim local_pos_in_gene = self.local_position ideal_min = self.local_position - flanking_size ideal_max = self.local_position + flanking_size left_pad = 0 if ideal_min < 0 left_pad = ideal_min * -1 ideal_min = 0 end local_pos_in_gene - ideal_min end |
#get_target_sequence(names, chromosome) ⇒ Object
627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 627 def get_target_sequence(names, chromosome) best = chromosome best_score = 0 names.each do |e| arr = e.split("_") if arr.length == 3 score = arr[2].to_f if score >best_score best_score = score best = e end end end best end |
#left_padding ⇒ Object
214 215 216 217 218 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 214 def left_padding flanking_size - self.local_position + 1 # primer_region.start - covered_region.start # 0 end |
#local_position ⇒ Object
226 227 228 229 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 226 def local_position # puts "local_position #{self.position} #{self.covered_region.start}" self.position - self.covered_region.start end |
#mask_aligned_chromosomal_snp(chromosome) ⇒ Object
646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 646 def mask_aligned_chromosomal_snp(chromosome) names = aligned_sequences.keys parentals = parental_sequences.keys position_after_trim = get_snp_position_after_trim names = names - parentals local_pos_in_gene = aligned_snp_position best_target = get_target_sequence(names, chromosome) masked_snps = aligned_sequences[best_target].downcase if aligned_sequences[best_target] masked_snps = "-" * aligned_sequences.values[0].size unless aligned_sequences[best_target] #TODO: Make this chromosome specific, even when we have more than one alignment going to the region we want. #puts "mask_aligned_chromosomal_snp(#{chromosome})" #puts names i = 0 for i in 0..masked_snps.size-1 #puts i different = 0 cov = 0 from_group = 0 nCount = 0 seen = [] names.each do | chr | if aligned_sequences[chr] and aligned_sequences[chr][i] != "-" #puts aligned_sequences[chr][i] cov += 1 nCount += 1 if aligned_sequences[chr][i] == 'N' or aligned_sequences[chr][i] == 'n' # maybe fix this to use ambiguity codes instead. if chr[0] == chromosome_group and not seen.include? chr[1] seen << chr[1] from_group += 1 end #puts "Comparing #{chromosome_group} and #{chr[0]} as chromosomes" if chr != best_target $stderr.puts "WARN: No base for #{masked_snps} : ##{i}" unless masked_snps[i].upcase $stderr.puts "WARN: No base for #{aligned_sequences[chr]} : ##{i}" unless masked_snps[i].upcase different += 1 if masked_snps[i].upcase != aligned_sequences[chr][i].upcase end end end masked_snps[i] = "-" if different == 0 masked_snps[i] = "-" if cov == 1 masked_snps[i] = "-" if nCount > 0 masked_snps[i] = "*" if cov == 0 expected_snps = names.size - 1 #puts "Diferences: #{different} to expected: #{ expected_snps } [#{i}] Genome count (#{from_group} == #{genomes_count})" masked_snps[i] = masked_snps[i].upcase if different == expected_snps and from_group == genomes_count #puts "#{i}:#{masked_snps[i]}" if i == local_pos_in_gene masked_snps[i] = "&" #puts "#{i}:#{masked_snps[i]}___" bases = "" names.each do | chr | bases << aligned_sequences[chr][i] if aligned_sequences[chr] and aligned_sequences[chr][i] != "-" end code_reference = "n" code_reference = Bio::NucleicAcid.to_IUAPC(bases) unless bases == "" if Bio::NucleicAcid.is_valid(code_reference, original) and Bio::NucleicAcid.is_valid(code_reference, snp) masked_snps[i] = ":" end end #i += 1 end masked_snps end |
#padded_position(pos) ⇒ Object
231 232 233 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 231 def padded_position(pos) pos + left_padding end |
#parental_sequences ⇒ Object
472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 472 def parental_sequences return @parental_sequences if @parental_sequences gene_region = self.covered_region local_pos_in_gene = self.local_position @parental_sequences = Bio::Alignment::SequenceHash.new container.parents.each do |name, bam| seq = nil if bam seq = bam.consensus_with_ambiguities({:region=>gene_region}).to_s else seq = container.gene_model_sequence(gene_region) unless name == self.snp_in seq[local_pos_in_gene] = self.original end end seq[local_pos_in_gene] = seq[local_pos_in_gene].upcase seq[local_pos_in_gene] = self.snp if name == self.snp_in @parental_sequences [name] = seq end @parental_sequences end |
#primer_3_all_strings(target_chromosome, parental) ⇒ Object
374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 374 def primer_3_all_strings(target_chromosome, parental) pr = primer_region(target_chromosome, parental ) primer_3_propertes = Array.new seq_original = String.new(pr.sequence) if seq_original.size < primer_3_min_seq_length errors << "The sequence (#{seq_original.size}) is shorter than #{primer_3_min_seq_length}" return primer_3_propertes end if self.hit_count > self.max_hits errors << "The marker maps to #{self.hit_count} positions (max_hits: #{self.max_hits}). " repetitive = true return primer_3_propertes end seq_original[pr.snp_pos] = self.original seq_original_reverse = reverse_complement_string(seq_original) seq_snp = String.new(pr.sequence) seq_snp[pr.snp_pos] = self.snp seq_snp_reverse = reverse_complement_string(seq_snp) rev_pos = seq_snp.size - position if pr.homoeologous @snp_type = "homoeologous" else @snp_type = "non-homoeologous" end pr.chromosome_specific.each do |pos| args = {:name =>"#{gene}:#{original}#{position}#{snp} #{original_name} chromosome_specific exon #{@snp_type} #{chromosome}", :left_pos => pr.snp_pos, :right_pos => pos, :sequence=>seq_original} primer_3_propertes << return_primer_3_string(args) args[:name] = "#{gene}:#{original}#{position}#{snp} #{snp_in} chromosome_specific exon #{@snp_type} #{chromosome}" args[:sequence] = seq_snp primer_3_propertes << return_primer_3_string(args) end pr.almost_chromosome_specific.each do |pos| args = {:name =>"#{gene}:#{original}#{position}#{snp} #{original_name} chromosome_semispecific exon #{@snp_type} #{chromosome}", :left_pos => pr.snp_pos, :right_pos => pos, :sequence=>seq_original} primer_3_propertes << return_primer_3_string(args) args[:name] = "#{gene}:#{original}#{position}#{snp} #{snp_in} chromosome_semispecific exon #{@snp_type} #{chromosome}" args[:sequence] = seq_snp primer_3_propertes << return_primer_3_string(args) end pr.crhomosome_specific_intron.each do |pos| args = {:name =>"#{gene}:#{original}#{position}#{snp} #{original_name} chromosome_specific intron #{@snp_type} #{chromosome}", :left_pos => pr.snp_pos, :right_pos => pos, :sequence=>seq_original} primer_3_propertes << return_primer_3_string(args) args[:name] = "#{gene}:#{original}#{position}#{snp} #{snp_in} chromosome_specific exon #{@snp_type} #{chromosome}" args[:sequence] = seq_snp primer_3_propertes << return_primer_3_string(args) end pr.almost_crhomosome_specific_intron.each do |pos| args = {:name =>"#{gene}:#{original}#{position}#{snp} #{original_name} chromosome_semispecific intron #{@snp_type} #{chromosome}", :left_pos => pr.snp_pos, :right_pos => pos, :sequence=>seq_original} primer_3_propertes << return_primer_3_string(args) args[:name] = "#{gene}:#{original}#{position}#{snp} #{snp_in} chromosome_semispecific exon #{@snp_type} #{chromosome}" args[:sequence] = seq_snp primer_3_propertes << return_primer_3_string(args) end args = {:name =>"#{gene}:#{original}#{position}#{snp} #{original_name} chromosome_nonspecific all #{@snp_type} #{chromosome}", :left_pos => pr.snp_pos, :sequence=>seq_original} primer_3_propertes << return_primer_3_string(args) args[:name] = "#{gene}:#{original}#{position}#{snp} #{snp_in} chromosome_nonspecific all #{@snp_type} #{chromosome}" args[:sequence] = seq_snp primer_3_propertes << return_primer_3_string(args) primer_3_propertes end |
#primer_3_string(target_chromosome, parental) ⇒ Object
461 462 463 464 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 461 def primer_3_string(target_chromosome, parental) strings = primer_3_all_strings(target_chromosome, parental) strings.join end |
#primer_fasta_string ⇒ Object
235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 235 def primer_fasta_string gene_region = self.covered_region local_pos_in_gene = self.local_position ret_str = "" surrounding_parental_sequences.each do |name, seq| ret_str << ">#{gene_region.entry}-#{self.position}_#{name}\n" ret_str << "#{seq}\n" end self.surrounding_exon_sequences.each do |chromosome, exon_seq| ret_str << ">#{chromosome}\n#{exon_seq}\n" end mask = surrounding_masked_chromosomal_snps(chromosome) ret_str << ">Mask\n#{mask}\n" pr = primer_region(chromosome, snp_in ) ret_str << pr.to_fasta ret_str end |
#primer_region(target_chromosome, parental) ⇒ Object
257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 257 def primer_region(target_chromosome, parental ) parental = aligned_sequences[parental].downcase names = aligned_sequences.keys target_chromosome = get_target_sequence(names, target_chromosome) chromosome_seq = aligned_sequences[target_chromosome] chromosome_seq = "-" * parental.size unless chromosome_seq chromosome_seq = chromosome_seq.downcase mask = mask_aligned_chromosomal_snp(target_chromosome) pr = PrimerRegion.new position_in_region = 0 (0..parental.size-1).each do |i| if chromosome_seq[i] != '-' or parental[i] != '-' case when mask[i] == '&' #This is the SNP we take the parental pr.snp_pos = position_in_region pr.homoeologous = false when mask[i] == ':' #This is the SNP we take the parental pr.snp_pos = position_in_region pr.homoeologous = true when mask[i] == '-' #When the mask doesnt detect a SNP, so we take the parental parental[i] = chromosome_seq[i] unless Bio::NucleicAcid::is_unambiguous(parental[i]) when /[[:upper:]]/.match(mask[i]) #This is a good candidate for marking a SNP #We validate that the consensus from the sam file accepts the variation from the chromosomal sequence if parental[i] == '-' parental[i] = mask[i] pr.crhomosome_specific_intron << position_in_region elsif Bio::NucleicAcid.is_valid(parental[i], mask[i]) parental[i] = mask[i] pr.chromosome_specific << position_in_region end when /[[:lower:]]/.match(mask[i]) #this is not that good candidate, but sitll gives specificity if parental[i] == '-' parental[i] = mask[i] pr.almost_crhomosome_specific_intron << position_in_region elsif Bio::NucleicAcid.is_valid(parental[i], mask[i]) parental[i] = mask[i].upcase pr.almost_chromosome_specific << position_in_region end end #Case closes position_in_region += 1 end #Closes region with bases end pr.sequence=parental.gsub('-','') pr end |
#return_primer_3_string(opts = {}) ⇒ Object
320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 320 def return_primer_3_string(opts={}) left = opts[:left_pos] right = opts[:right_pos] sequence = opts[:sequence] extra = opts[:extra] orientation = "forward" if opts[:right_pos] orientation = "forward" if left > right left = sequence.size - left - 1 right = sequence.size - right - 1 sequence = reverse_complement_string(sequence) orientation = "reverse" end if @variation_free_region > 0 check_str = sequence[right+1, @variation_free_region] return nil if check_str != check_str.downcase end end #puts "__" #puts self.inspect str = "SEQUENCE_ID=#{opts[:name]} #{orientation} \n" str << "SEQUENCE_FORCE_LEFT_END=#{left}\n" unless opts[:extra_f] str << "SEQUENCE_FORCE_RIGHT_END=#{right}\n" if opts[:right_pos] str << extra if extra str << opts[:extra_f] if opts[:extra_f] str << "SEQUENCE_TEMPLATE=#{sequence}\n" str << "=\n" #In case that we don't have a right primer, we do both orientations unless opts[:right_pos] sequence = opts[:sequence] left = sequence.size - left - 1 orientation = "reverse" sequence = reverse_complement_string(sequence) str << "SEQUENCE_ID=#{opts[:name]} #{orientation}\n" str << "SEQUENCE_FORCE_LEFT_END=#{left}\n" unless opts[:extra_r] str << opts[:extra_r] if opts[:extra_r] str << "SEQUENCE_TEMPLATE=#{sequence}\n" str << extra if extra str << "=\n" end str end |
#reverse_complement_string(sequenc_str) ⇒ Object
315 316 317 318 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 315 def reverse_complement_string(sequenc_str) complement = sequenc_str.tr('atgcrymkdhvbswnATGCRYMKDHVBSWN', 'tacgyrkmhdbvswnTACGYRKMHDBVSWN') complement.reverse! end |
#right_padding ⇒ Object
220 221 222 223 224 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 220 def right_padding ret = (2*flanking_size) - (left_padding + self.covered_region.size ) ret = 0 if ret < 0 ret end |
#sequences_to_align ⇒ Object
554 555 556 557 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 554 def sequences_to_align @sequences_to_align = surrounding_parental_sequences.merge(surrounding_exon_sequences) unless @sequences_to_align @sequences_to_align end |
#setTemplateFromFastaFile(fastaFile, flanking_size: 100) ⇒ Object
75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 75 def setTemplateFromFastaFile(fastaFile ,flanking_size: 100) reg = Bio::DB::Fasta::Region.new reg.entry = gene reg.entry = @contig if @contig reg.start = position - flanking_size reg.end = position + flanking_size + 1 reg.orientation = :forward entry = fastaFile.index.region_for_entry(reg.entry) reg.start = 1 if reg.start < 1 reg.end = entry.length if reg.end > entry.length amb = Bio::NucleicAcid.to_IUAPC("#{original}#{snp}") @position = @position - reg.start + 1 @position = 1 if @position < 1 #puts "about to fetch" self.template_sequence = fastaFile.fetch_sequence(reg) #puts "done fetching" template_sequence[position - 1] = amb end |
#short_s ⇒ Object
457 458 459 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 457 def short_s "#{original}#{position}#{snp}".upcase end |
#snp_id_in_seq ⇒ Object
137 138 139 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 137 def snp_id_in_seq "#{original}#{position}#{snp}" end |
#surrounding_exon_sequences ⇒ Object
753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 753 def surrounding_exon_sequences return @surrounding_exon_sequences if @surrounding_exon_sequences gene_region = self.covered_region @surrounding_exon_sequences = Bio::Alignment::SequenceHash.new self.exon_list.each do |chromosome, exon_arr| exon_arr.each do |exon| exon_start_offset = exon.query_region.start - gene_region.start flanking_region = exon.target_flanking_region_from_position(position,flanking_size) #TODO: Padd when the exon goes over the regions... #puts flanking_region.inspect #Ignoring when the exon is in a gap unless exon.snp_in_gap exon_seq = container.chromosome_sequence(flanking_region) @surrounding_exon_sequences["#{chromosome}_#{flanking_region.start}_#{exon.record.score}"] = exon_seq end end end @surrounding_exon_sequences end |
#surrounding_masked_chromosomal_snps(chromosome) ⇒ Object
721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 721 def surrounding_masked_chromosomal_snps(chromosome) chromosomes = surrounding_exon_sequences names = chromosomes.keys get_target_sequence(names) masked_snps = chromosomes[chromosome].tr("-","+") if chromosomes[chromosome] masked_snps = "-" * (flanking_size * 2 ) unless chromosomes[chromosome] local_pos_in_gene = flanking_size i = 0 while i < masked_snps.size do different = 0 cov = 0 names.each do | chr | if chromosomes[chr][i] != "-" and chromosomes[chr][i]. != 'N' and chromosomes[chr][i]. != 'n' cov += 1 if chr != chromosome and masked_snps[i] != "+" different += 1 if masked_snps[i] != chromosomes[chr][i] end end end masked_snps[i] = "-" if different == 0 and masked_snps[i] != "+" masked_snps[i] = "-" if cov < 2 masked_snps[i] = masked_snps[i].upcase if different > 1 if i == local_pos_in_gene masked_snps[i] = "&" end i += 1 end masked_snps end |
#surrounding_parental_sequences ⇒ Object
499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 499 def surrounding_parental_sequences return @surrounding_parental_sequences if @surrounding_parental_sequences gene_region = self.covered_region local_pos_in_gene = self.local_position @surrounding_parental_sequences = Bio::Alignment::SequenceHash.new container.parents.each do |name, bam| seq = nil if bam seq = bam.consensus_with_ambiguities({:region=>gene_region}).to_s else seq = container.gene_model_sequence(gene_region) #puts "#{name} #{self.snp_in}" #puts "Modifing original: #{name}\n#{seq}" unless name == self.snp_in seq[local_pos_in_gene] = self.original else seq[local_pos_in_gene] = self.snp end #puts "#{seq}" end seq[local_pos_in_gene] = seq[local_pos_in_gene].upcase seq[local_pos_in_gene] = self.snp if name == self.snp_in @surrounding_parental_sequences [name] = cut_and_pad_sequence_to_primer_region(seq) end @surrounding_parental_sequences end |
#to_fasta ⇒ Object
160 161 162 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 160 def to_fasta return ">#{self.gene}\n#{self.template_sequence}\n" end |
#to_polymarker_coordinates(flanking_size, total: nil) ⇒ Object
106 107 108 109 110 111 112 113 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 106 def to_polymarker_coordinates(flanking_size, total:nil) start = position - flanking_size + 1 start = 0 if start < 0 total = flanking_size * 2 unless total total += 1 new_position = position - start + 2 [start , total, new_position ] end |
#to_polymarker_sequence(flanking_size, total: nil) ⇒ Object
115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 115 def to_polymarker_sequence(flanking_size, total:nil) out = template_sequence.clone snp_seq = "[#{original}/#{snp}]" p = position-1 if orientation == :reverse p = out.length - p - 1 s = Bio::Sequence::NA.new(out) s1 = Bio::Sequence::NA.new(original) s2 = Bio::Sequence::NA.new(snp) out = s.reverse_complement snp_seq = "[#{s1.reverse_complement}/#{s2.reverse_complement}]" end out[p] = snp_seq start = position - flanking_size - 1 start = 0 if start < 0 total = flanking_size * 2 unless total total += 5 out[start , total ].upcase end |
#to_s ⇒ Object
453 454 455 |
# File 'lib/bio/PolyploidTools/SNP.rb', line 453 def to_s "#{gene}:#{original}#{position}#{snp}#{chromosome}" end |