Class: FastaManip
- Inherits:
-
Object
- Object
- FastaManip
- Defined in:
- lib/bacterial-annotator/fasta-manip.rb
Instance Attribute Summary collapse
-
#fasta_file ⇒ Object
readonly
Returns the value of attribute fasta_file.
-
#fasta_flat ⇒ Object
readonly
Returns the value of attribute fasta_flat.
-
#prodigal_files ⇒ Object
readonly
Returns the value of attribute prodigal_files.
Instance Method Summary collapse
-
#initialize(fasta_file, meta) ⇒ FastaManip
constructor
Initialize fasta holder.
-
#print_sequence_for_gbk(seq) ⇒ Object
Utility function to print the sequence to the end of a gbk file.
-
#run_prodigal(root, outdir) ⇒ Object
Run prodigal on the genome to annotate.
-
#split_fasta(outdir) ⇒ Object
Split Multi Genbanks file RETURN : array of fasta files.
-
#split_genbank(outdir, multigbk) ⇒ Object
Split Multi Genbanks file RETURN : array of genbank files.
Constructor Details
#initialize(fasta_file, meta) ⇒ FastaManip
Initialize fasta holder
16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
# File 'lib/bacterial-annotator/fasta-manip.rb', line 16 def initialize fasta_file, @fasta_file = fasta_file @fasta_flat = Bio::FlatFile.auto(@fasta_file) @meta = @prodigal_files = nil @single_fasta = nil @seq_info = nil if @fasta_flat.dbclass != Bio::FastaFormat abort "Aborting : The input sequence is not a fasta file !" end end |
Instance Attribute Details
#fasta_file ⇒ Object (readonly)
Returns the value of attribute fasta_file.
13 14 15 |
# File 'lib/bacterial-annotator/fasta-manip.rb', line 13 def fasta_file @fasta_file end |
#fasta_flat ⇒ Object (readonly)
Returns the value of attribute fasta_flat.
13 14 15 |
# File 'lib/bacterial-annotator/fasta-manip.rb', line 13 def fasta_flat @fasta_flat end |
#prodigal_files ⇒ Object (readonly)
Returns the value of attribute prodigal_files.
13 14 15 |
# File 'lib/bacterial-annotator/fasta-manip.rb', line 13 def prodigal_files @prodigal_files end |
Instance Method Details
#print_sequence_for_gbk(seq) ⇒ Object
Utility function to print the sequence to the end of a gbk file
106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
# File 'lib/bacterial-annotator/fasta-manip.rb', line 106 def print_sequence_for_gbk seq outseq = "ORIGIN\n" # puts "ORIGIN" ntNum = 0 sequence = seq.seq.downcase nt_left = true it = 0 while nt_left if sequence.length > it+60 nt_to_add = sequence[it..(it+59)] # printf "%9s ", (ntNum - l.size + 2) outseq += "%9s " % (it+1) outseq += nt_to_add.scan(/.{1,10}/).join(" ") outseq += "\n" it += 60 else nt_to_add = sequence[it..sequence.length-1] outseq += "%9s " % (it+1) outseq += nt_to_add.scan(/.{1,10}/).join(" ") outseq += "\n" outseq += "//" nt_left = false end end return outseq, sequence.length end |
#run_prodigal(root, outdir) ⇒ Object
Run prodigal on the genome to annotate
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
# File 'lib/bacterial-annotator/fasta-manip.rb', line 32 def run_prodigal root, outdir @prodigal_files = {} Dir.mkdir "#{outdir}" if ! Dir.exists? "#{outdir}" if @meta system("#{root}/prodigal.linux -p meta -i #{@fasta_file} -a #{outdir}/Proteins.fa -d #{outdir}/Genes.fa -o #{outdir}/Genbanks.gbk -q") else system("#{root}/prodigal.linux -i #{@fasta_file} -a #{outdir}/Proteins.fa -d #{outdir}/Genes.fa -o #{outdir}/Genbanks.gbk -q") end @prodigal_files = {multiGBK: "#{outdir}/Genbanks.gbk", contigs: [], contigs_length: [], genes: "#{outdir}/Genes.fa", proteins: "#{outdir}/Proteins.fa", prot_ids_by_contig: {}, fasta_path: "#{outdir}/single-fasta/", gbk_path: "#{outdir}/single-genbank/"} split_fasta outdir split_genbank outdir, "#{outdir}/Genbanks.gbk" extract_cds_names @prodigal_files end |
#split_fasta(outdir) ⇒ Object
Split Multi Genbanks file RETURN : array of fasta files
58 59 60 61 62 63 64 65 66 67 68 69 70 |
# File 'lib/bacterial-annotator/fasta-manip.rb', line 58 def split_fasta outdir @single_fasta = {} Dir.mkdir("#{outdir}/single-fasta") if ! Dir.exists?("#{outdir}/single-fasta") @fasta_flat.each_entry do |seq| file_name = seq.definition.chomp.split(" ")[0] @prodigal_files[:contigs] << "#{file_name}" @prodigal_files[:contigs_length] << seq.seq.length File.open("#{outdir}/single-fasta/#{file_name}.fasta", "w") do |fwrite| fwrite.write(seq) end @single_fasta[file_name] = seq end end |
#split_genbank(outdir, multigbk) ⇒ Object
Split Multi Genbanks file RETURN : array of genbank files
75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
# File 'lib/bacterial-annotator/fasta-manip.rb', line 75 def split_genbank outdir, multigbk Dir.mkdir("#{outdir}/single-genbank")if ! Dir.exists?("#{outdir}/single-genbank") File.open(multigbk,"r") do |f| fopen = nil while l = f.gets if l[0..9] == "DEFINITION" file_name = l.chomp.split(";")[2].gsub("seqhdr","").delete("\"").delete("=").split(" ")[0] outseq, seq_length = print_sequence_for_gbk @single_fasta[file_name] spacer = " " * (20-seq_length.to_s.length) date = DateTime.now month = Date::ABBR_MONTHNAMES[date.month] day = "%02d" % date.day year = date.year locus = "LOCUS #{file_name}#{spacer}#{seq_length.to_s} bp DNA linear BCT #{day}-#{month}-#{year}\n" locus += "DEFINITION #{file_name}\n" fopen = File.open("#{outdir}/single-genbank/#{file_name}.gbk", "w") fopen.write(locus) elsif l[0..1] == "//" fopen.write(outseq) fopen.close elsif ! l.include? " /note=" fopen.write(l) end end end end |