Class: Bio::HMMER::HMMER3::DefaultHMMSearchReport
- Inherits:
-
Object
- Object
- Bio::HMMER::HMMER3::DefaultHMMSearchReport
- Defined in:
- lib/bio/appl/hmmer/hmmer3/default_report.rb
Overview
This class is for parsing HMMSearch outputs from the default output
Defined Under Namespace
Classes: Hit
Constant Summary collapse
- DELIMITER =
Delimiter of each entry for Bio::FlatFile support.
RS = "\n//\n"
Instance Method Summary collapse
-
#hits ⇒ Object
Return an array of HMMER3::Hit objects from this report.
-
#initialize(data) ⇒ DefaultHMMSearchReport
constructor
A new instance of DefaultHMMSearchReport.
- #query ⇒ Object
- #query_accession ⇒ Object
- #query_description ⇒ Object
Constructor Details
#initialize(data) ⇒ DefaultHMMSearchReport
Returns a new instance of DefaultHMMSearchReport.
30 31 32 33 34 35 |
# File 'lib/bio/appl/hmmer/hmmer3/default_report.rb', line 30 def initialize(data) # The input data is divided into chunks, a hash called @report_chunks (previously called subdata) @report_chunks = get_subdata(data) @log = Bio::Log::LoggerPlus['bio-hmmer3report'] end |
Instance Method Details
#hits ⇒ Object
Return an array of HMMER3::Hit objects from this report
109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 |
# File 'lib/bio/appl/hmmer/hmmer3/default_report.rb', line 109 def hits return [] unless @report_chunks['alignment'].match(/^>>/) # For each hit sequence (hits) sequence_annotations = @report_chunks['alignment'].split(">>") # puts "Found #{sequence_annotations.length} different hits e.g. #{sequence_annotations[0]}\n\n and #{sequence_annotations[1]} and \n\n #{sequence_annotations[sequence_annotations.length-1]}" alignments = [] sequence_annotations.each_with_index do |seq_annot, i| #Ignore the first split as it is rubbish leftover from the split above next if i==0 # Now split on \n\n. Each of these splits should have 1 or more domains associated #First of this split will be "stanzas" like this #>> 637984252 Acid345_2236 D-isomer specific 2-hydroxyacid dehydrogenase, NAD-binding [Korebacter versatilis Ellin345] # # score bias c-Evalue i-Evalue hmmfrom hmm to alifrom ali to envfrom env to acc # --- ------ ----- --------- --------- ------- ------- ------- ------- ------- ------- ---- # 1 ! 120.2 0.5 7.6e-39 1.2e-35 1 133 [] 3 313 .. 3 313 .. 0.98 #And AFTER the first split comes "stanzas" like this # Alignments for each domain: # == domain 1 score: 120.2 bits; conditional E-value: 7.6e-39 # EEECST.-CCHHHHHCC..TEEEEEEC.GSSHHHHHC....-SEEEE-TTS-BSHHHHCC-TT--EEEES----TTB-HHHHHH---EEE--TTTTHHH CS # xxxxxxxxxxxxxxxxx..xxxxxxxx.xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx RF # 2-Hacid_dh 1 vlileplreeelellke..gvevevkd.ellteellekakdadalivrsntkvtaevleklpkLkviatagvGvDniDldaakerGIlVtnvpgystes 96 # +++ e++ +++++l+k+ +v++ d ++e+lle++k+adalivrs v+a++le++ +L+vi++agvGvDni l+aa+++GI+V+n+pg+++ + # 637982115 3 IVVAEKIAKAAIDLFKQdpTWNVVTPDqVAQKEQLLEQLKGADALIVRSAVFVDAAMLEHADQLRVIGRAGVGVDNIELEAATRKGIAVMNTPGANAIA 101 # 6889999**********8544777777778888****************************************************************** PP stanzas = seq_annot.split("\n\n") hit = Hit.new hsps = [] # Parse the first lines = stanzas[0].split("\n") sequence_name = lines[0].gsub(/^ /,'') hit.sequence_name = sequence_name @log.debug "Now parsing table for #{sequence_name}" return [] if lines[1].match(/^ \[No individual domains that/) lines[3..(lines.length-1)].each do |line| annotation = Hit::Hsp.new splits = line.split(/\s+/) i = 1 annotation.number = splits[i].to_i; i+=2 annotation.score = splits[i].to_f; i+=1 annotation.bias = splits[i].to_f; i+=1 annotation.c_evalue = splits[i].to_f; i+=1 annotation.i_evalue = splits[i].to_f; i+=1 annotation.hmmfrom = splits[i].to_i; i+=1 annotation.hmm_to = splits[i].to_i; i+=2 annotation.alifrom = splits[i].to_i; i+=1 annotation.ali_to = splits[i].to_i; i+=2 annotation.envfrom = splits[i].to_i; i+=1 annotation.env_to = splits[i].to_i; i+=2 annotation.acc = splits[i].to_f hsps.push annotation end # Parse the second stanza and beyond current_hsp = nil (1..(stanzas.length-1)).each_with_index do |aln, index| next if stanzas[aln] == "\n" stanza = stanzas[aln].split("\n") line_offset = 0 line_offset += 1 if index==0 #to account for the "Alignments for each domain:" line # Is this a new HSP being described? if matches = stanza[line_offset].match(/^ == domain (\d+)/) domain_index = matches[1].to_i-1 current_hsp = hsps[domain_index] line_offset += 1 # to account for the "== domain 1 score: 26.8 bits; conditional E-value: 5.7e-10" line end # Detect CS and RF lines line_offset += 1 if stanza[line_offset].split(/\s+/)[2] == 'CS' line_offset += 1 if stanza[line_offset].split(/\s+/)[2] == 'RF' # Add the lines to the relevant places current_hsp.hmmseq ||= '' current_hsp.hmmseq += stanza[line_offset].split(/\s+/)[3] current_hsp.flatseq ||= '' current_hsp.flatseq += stanza[2+line_offset].split(/\s+/)[3] end hit.hsps = hsps alignments.push hit @log.debug "Parsed alignments for sequence #{hit.sequence_name}" if @log.debug? end return alignments end |
#query ⇒ Object
83 84 85 86 87 |
# File 'lib/bio/appl/hmmer/hmmer3/default_report.rb', line 83 def query return @query unless @query.nil? parse_query return @query end |
#query_accession ⇒ Object
89 90 91 92 93 |
# File 'lib/bio/appl/hmmer/hmmer3/default_report.rb', line 89 def query_accession return @query_accession unless @query_accession.nil? parse_query return @query_accession end |
#query_description ⇒ Object
95 96 97 98 99 |
# File 'lib/bio/appl/hmmer/hmmer3/default_report.rb', line 95 def query_description return @query_description unless @query_description.nil? parse_query return @query_description end |