Class: Bio::PolyploidTools::ExonContainer
- Inherits:
-
Object
- Object
- Bio::PolyploidTools::ExonContainer
- Defined in:
- lib/bio/PolyploidTools/ExonContainer.rb
Constant Summary collapse
- BASES =
[:A, :C, :G, :T]
Instance Attribute Summary collapse
-
#chromosomes(path) ⇒ Object
readonly
Sets the reference file for the gene models.
-
#flanking_size ⇒ Object
Returns the value of attribute flanking_size.
-
#gene_models_db ⇒ Object
readonly
Returns the value of attribute gene_models_db.
-
#max_hits ⇒ Object
Returns the value of attribute max_hits.
-
#parental_1_name ⇒ Object
readonly
Returns the value of attribute parental_1_name.
-
#parental_1_sam ⇒ Object
readonly
Returns the value of attribute parental_1_sam.
-
#parental_2_name ⇒ Object
readonly
Returns the value of attribute parental_2_name.
-
#parental_2_sam ⇒ Object
readonly
Returns the value of attribute parental_2_sam.
-
#parents ⇒ Object
readonly
Returns the value of attribute parents.
-
#primer_3_min_seq_length ⇒ Object
Returns the value of attribute primer_3_min_seq_length.
-
#snp_map ⇒ Object
readonly
Returns the value of attribute snp_map.
Instance Method Summary collapse
- #add_alignments(opts = Hash.new) ⇒ Object
- #add_chromosome_arm(opts) ⇒ Object
- #add_parental(opts = Hash.new) ⇒ Object
- #add_snp(snp) ⇒ Object
- #add_snp_file(filename, chromosome, snp_in, original_name) ⇒ Object
-
#chromosome_sequence(region) ⇒ Object
Retunrs the sequence for a region in the gene models (exon).
- #fasta_string_for_snp(snp) ⇒ Object
-
#gene_model_sequence(region) ⇒ Object
Returns the sequence for a region in the gene models (exon).
- #gene_models(path) ⇒ Object
-
#initialize ⇒ ExonContainer
constructor
Sets the reference file for the gene models.
- #print_fasta_snp_exones(file) ⇒ Object
- #print_primer_3_exons(file, target_chromosome, parental) ⇒ Object
- #remove_alignments_over_max_hits ⇒ Object
Constructor Details
#initialize ⇒ ExonContainer
Sets the reference file for the gene models
13 14 15 16 17 18 |
# File 'lib/bio/PolyploidTools/ExonContainer.rb', line 13 def initialize @parents=Hash.new @snp_map = Hash.new @primer_3_min_seq_length = 50 @max_hits = 10 end |
Instance Attribute Details
#chromosomes(path) ⇒ Object (readonly)
Sets the reference file for the gene models
50 51 52 |
# File 'lib/bio/PolyploidTools/ExonContainer.rb', line 50 def chromosomes @chromosomes end |
#flanking_size ⇒ Object
Returns the value of attribute flanking_size.
8 9 10 |
# File 'lib/bio/PolyploidTools/ExonContainer.rb', line 8 def flanking_size @flanking_size end |
#gene_models_db ⇒ Object (readonly)
Returns the value of attribute gene_models_db.
5 6 7 |
# File 'lib/bio/PolyploidTools/ExonContainer.rb', line 5 def gene_models_db @gene_models_db end |
#max_hits ⇒ Object
Returns the value of attribute max_hits.
8 9 10 |
# File 'lib/bio/PolyploidTools/ExonContainer.rb', line 8 def max_hits @max_hits end |
#parental_1_name ⇒ Object (readonly)
Returns the value of attribute parental_1_name.
5 6 7 |
# File 'lib/bio/PolyploidTools/ExonContainer.rb', line 5 def parental_1_name @parental_1_name end |
#parental_1_sam ⇒ Object (readonly)
Returns the value of attribute parental_1_sam.
4 5 6 |
# File 'lib/bio/PolyploidTools/ExonContainer.rb', line 4 def parental_1_sam @parental_1_sam end |
#parental_2_name ⇒ Object (readonly)
Returns the value of attribute parental_2_name.
5 6 7 |
# File 'lib/bio/PolyploidTools/ExonContainer.rb', line 5 def parental_2_name @parental_2_name end |
#parental_2_sam ⇒ Object (readonly)
Returns the value of attribute parental_2_sam.
4 5 6 |
# File 'lib/bio/PolyploidTools/ExonContainer.rb', line 4 def parental_2_sam @parental_2_sam end |
#parents ⇒ Object (readonly)
Returns the value of attribute parents.
7 8 9 |
# File 'lib/bio/PolyploidTools/ExonContainer.rb', line 7 def parents @parents end |
#primer_3_min_seq_length ⇒ Object
Returns the value of attribute primer_3_min_seq_length.
8 9 10 |
# File 'lib/bio/PolyploidTools/ExonContainer.rb', line 8 def primer_3_min_seq_length @primer_3_min_seq_length end |
#snp_map ⇒ Object (readonly)
Returns the value of attribute snp_map.
6 7 8 |
# File 'lib/bio/PolyploidTools/ExonContainer.rb', line 6 def snp_map @snp_map end |
Instance Method Details
#add_alignments(opts = Hash.new) ⇒ Object
181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 |
# File 'lib/bio/PolyploidTools/ExonContainer.rb', line 181 def add_alignments(opts=Hash.new) opts = { :min_identity=>90, filter_best:false }.merge!(opts) exonerate_filename = opts[:exonerate_file] arm_selection = opts[:arm_selection] filter_best = opts[:filter_best] unless arm_selection arm_selection = lambda do | contig_name | ret = contig_name[0,3] return ret end end File.open(exonerate_filename) do |f| f.each_line do | line | record = Bio::DB::Exonerate::Alignment.parse_custom(line) if record and record.identity >= opts[:min_identity] snp_array = @snp_map[record.query_id] if snp_array != nil snp_array.each do |snp| if snp != nil and snp.position.between?( (record.query_start + 1) , record.query_end) begin exon = record.exon_on_gene_position(snp.position) snp.add_exon(exon, arm_selection.call(record.target_id), filter_best:filter_best) rescue Bio::DB::Exonerate::ExonerateException $stderr.puts "Failed for the range #{record.query_start}-#{record.query_end} for position #{snp.position}" end end end end end end end remove_alignments_over_max_hits end |
#add_chromosome_arm(opts) ⇒ Object
70 71 72 73 74 75 76 |
# File 'lib/bio/PolyploidTools/ExonContainer.rb', line 70 def add_chromosome_arm(opts) @chromosomes = Hash.new unless @chromosomes name = opts[:name] path = opts[:reference_path] path = opts[:alig_path] chromosomes[name] = Bio::DB::Fasta::FastaFile.new({:fasta=>path}) end |
#add_parental(opts = Hash.new) ⇒ Object
232 233 234 235 236 237 238 239 240 241 242 |
# File 'lib/bio/PolyploidTools/ExonContainer.rb', line 232 def add_parental(opts=Hash.new) # opts = { :name=>opts[:path]}.merge!(opts) sam = nil name = opts[:name] ? opts[:name] : "Unknown" if opts[:path] path = opts[:path] name = opts[:name] ? opts[:name] : path.basename(".bam") sam = Bio::DB::Sam.new({:fasta=>@gene_models_path, :bam=>opts[:path]}) end @parents[name] = sam end |
#add_snp(snp) ⇒ Object
78 79 80 81 82 83 |
# File 'lib/bio/PolyploidTools/ExonContainer.rb', line 78 def add_snp(snp) snp.max_hits = self.max_hits @snp_map[snp.gene] = Array.new unless @snp_map[snp.gene] @snp_map[snp.gene] << snp end |
#add_snp_file(filename, chromosome, snp_in, original_name) ⇒ Object
85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
# File 'lib/bio/PolyploidTools/ExonContainer.rb', line 85 def add_snp_file(filename, chromosome, snp_in, original_name) File.open(filename) do | f | f.each_line do | line | snp = SNP.parse(line) snp.flanking_size = flanking_size if snp.position > 0 snp.container = self snp.chromosome = chromosome snp.snp_in = snp_in snp.original_name = original_name @snp_map[snp.gene] = Array.new unless @snp_map[snp.gene] @snp_map[snp.gene] << snp end end end end |
#chromosome_sequence(region) ⇒ Object
Retunrs the sequence for a region in the gene models (exon)
56 57 58 59 60 61 62 63 64 65 66 67 |
# File 'lib/bio/PolyploidTools/ExonContainer.rb', line 56 def chromosome_sequence(region) left_pad = 0 #TODO: Padd if it goes to the right if(region.start < 1) left_pad = region.start * -1 left_pad += 1 region.start = 1 end str = "-" * left_pad << @chromosomes_db.fetch_sequence(region) #str << "n" * (region.size - str.size + 1) if region.size > str.size str end |
#fasta_string_for_snp(snp) ⇒ Object
106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
# File 'lib/bio/PolyploidTools/ExonContainer.rb', line 106 def fasta_string_for_snp(snp) gene_region = snp.covered_region local_pos_in_gene = snp.local_position ret_str = "" @parents.each do |name, bam| ret_str << ">#{gene_region.id}_SNP-#{snp.position}_#{name} Overlapping_exons:#{gene_region.to_s} localSNPpo:#{local_pos_in_gene+1}\n" to_print = bam.consensus_with_ambiguities({:region=>gene_region}).to_s to_print[local_pos_in_gene] = to_print[local_pos_in_gene].upcase ret_str << to_print << "\n" end snp.exon_list.each do | chromosome, exon | target_region = exon.target_region exon_start_offset = exon.query_region.start - gene_region.start chr_local_pos=local_pos_in_gene + target_region.start + 1 ret_str << ">#{chromosome}_SNP-#{chr_local_pos} #{exon.to_s} #{target_region.orientation}\n" to_print = "-" * exon_start_offset chr_seq = chromosome_sequence(exon.target_region).to_s l_pos = exon_start_offset + local_pos_in_gene to_print << chr_seq to_print[local_pos_in_gene] = to_print[local_pos_in_gene].upcase ret_str << to_print end ret_str end |
#gene_model_sequence(region) ⇒ Object
Returns the sequence for a region in the gene models (exon)
27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
# File 'lib/bio/PolyploidTools/ExonContainer.rb', line 27 def gene_model_sequence(region) #puts "Region: " #puts region.inspect target_reg = @gene_models_db.index.region_for_entry(region.entry) #puts target_reg.inspect region.end = target_reg.length if region.end > target_reg.length #entries[region.entry] seq=@gene_models_db.fetch_sequence(region) #puts "sequence: " #This is a patch that we need to fix in biosamtools: #puts seq index = seq.index('>') if(index ) index -= 1 #puts "Index: #{index}" seq = seq.slice(0..index) end #puts seq seq end |
#gene_models(path) ⇒ Object
20 21 22 23 24 |
# File 'lib/bio/PolyploidTools/ExonContainer.rb', line 20 def gene_models(path) @gene_models_db = Bio::DB::Fasta::FastaFile.new({:fasta=>path}) @gene_models_db.index @gene_models_path = path end |
#print_fasta_snp_exones(file) ⇒ Object
132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 |
# File 'lib/bio/PolyploidTools/ExonContainer.rb', line 132 def print_fasta_snp_exones (file) @missing_exons = Set.new unless @missing_exons @snp_map.each do | gene, snp_array| snp_array.each do |snp| #file.puts snp.primer_fasta_string #puts "In print_fast_np_exones" #puts snp.inspect begin file.puts snp.aligned_sequences_fasta rescue Exception=>e #puts snp.inspect @missing_exons << snp.to_s $stderr.puts "print_fasta_snp_exones:" + snp.to_s + ":" + e.to_s $stderr.puts "Local position: #{snp.local_position}" $stderr.puts "Local position: #{snp.parental_sequences.to_s}" $stderr.puts e.backtrace end end end end |
#print_primer_3_exons(file, target_chromosome, parental) ⇒ Object
154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 |
# File 'lib/bio/PolyploidTools/ExonContainer.rb', line 154 def print_primer_3_exons (file, target_chromosome , parental ) added = 0 @snp_map.each do | gene, snp_array| snp_array.each do |snp| string = "" begin primer_3_min_seq_length string = snp.primer_3_string( snp.chromosome, parental ) #TODO: add tan error to the SNP this snp has more than max_hits. #Or maybe inside the SNP file. if string.size > 0 file.puts string added += 1 end rescue Exception=>e @missing_exons << snp.to_s # $stderr.puts "" $stderr.puts "print_primer_3_exons: #{e.to_s} : snp.to_s" $stderr.puts e.backtrace end end end return added end |
#remove_alignments_over_max_hits ⇒ Object
218 219 220 221 222 223 224 225 226 227 228 229 230 |
# File 'lib/bio/PolyploidTools/ExonContainer.rb', line 218 def remove_alignments_over_max_hits @snp_map.each_pair do | gene, snp_array| snp_array.each do |snp| total_hits = snp.exon_list.map {|e| e[1].size}.reduce(0,:+) snp.hit_count = total_hits if total_hits > max_hits snp.exon_list = {} snp.repetitive = true snp.errors << "The marker is in a repetitive region (#{total_hits} hits to reference)" end end end end |