Class: Bio::HMMER::HMMER3::DefaultHMMSearchReport

Inherits:
Object
  • Object
show all
Defined in:
lib/bio/appl/hmmer/hmmer3/default_report.rb

Overview

This class is for parsing HMMSearch outputs from the default output

Defined Under Namespace

Classes: Hit

Constant Summary collapse

DELIMITER =

Delimiter of each entry for Bio::FlatFile support.

RS = "\n//\n"

Instance Method Summary collapse

Constructor Details

#initialize(data) ⇒ DefaultHMMSearchReport

Returns a new instance of DefaultHMMSearchReport.



30
31
32
33
34
35
# File 'lib/bio/appl/hmmer/hmmer3/default_report.rb', line 30

def initialize(data)
  # The input data is divided into chunks, a hash called @report_chunks (previously called subdata)
  @report_chunks = get_subdata(data)
  
  @log = Bio::Log::LoggerPlus['bio-hmmer3report']
end

Instance Method Details

#hitsObject

Return an array of HMMER3::Hit objects from this report



109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
# File 'lib/bio/appl/hmmer/hmmer3/default_report.rb', line 109

def hits
  return [] unless @report_chunks['alignment'].match(/^>>/)
  # For each hit sequence (hits)
  sequence_annotations = @report_chunks['alignment'].split(">>")
  # puts "Found #{sequence_annotations.length} different hits e.g. #{sequence_annotations[0]}\n\n and #{sequence_annotations[1]} and \n\n #{sequence_annotations[sequence_annotations.length-1]}"
  
  alignments = []
  sequence_annotations.each_with_index do |seq_annot, i|
    #Ignore the first split as it is rubbish leftover from the split above
    next if i==0
    
    # Now split on \n\n. Each of these splits should have 1 or more domains associated
    
    #First of this split will be "stanzas" like this
    #>> 637984252  Acid345_2236 D-isomer specific 2-hydroxyacid dehydrogenase, NAD-binding [Korebacter versatilis Ellin345]
    #   #    score  bias  c-Evalue  i-Evalue hmmfrom  hmm to    alifrom  ali to    envfrom  env to     acc
    # ---   ------ ----- --------- --------- ------- -------    ------- -------    ------- -------    ----
    #   1 !  120.2   0.5   7.6e-39   1.2e-35       1     133 []       3     313 ..       3     313 .. 0.98
    
    #And AFTER the first split comes "stanzas" like this
    #  Alignments for each domain:
    #  == domain 1    score: 120.2 bits;  conditional E-value: 7.6e-39
    #                 EEECST.-CCHHHHHCC..TEEEEEEC.GSSHHHHHC....-SEEEE-TTS-BSHHHHCC-TT--EEEES----TTB-HHHHHH---EEE--TTTTHHH CS
    #                 xxxxxxxxxxxxxxxxx..xxxxxxxx.xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx RF
    #  2-Hacid_dh   1 vlileplreeelellke..gvevevkd.ellteellekakdadalivrsntkvtaevleklpkLkviatagvGvDniDldaakerGIlVtnvpgystes 96 
    #                 +++ e++ +++++l+k+    +v++ d   ++e+lle++k+adalivrs   v+a++le++ +L+vi++agvGvDni l+aa+++GI+V+n+pg+++ +
    #   637982115   3 IVVAEKIAKAAIDLFKQdpTWNVVTPDqVAQKEQLLEQLKGADALIVRSAVFVDAAMLEHADQLRVIGRAGVGVDNIELEAATRKGIAVMNTPGANAIA 101
    #                 6889999**********8544777777778888****************************************************************** PP
    stanzas = seq_annot.split("\n\n")
    
    hit = Hit.new
    hsps = []
    
    # Parse the first
    lines = stanzas[0].split("\n")
    sequence_name = lines[0].gsub(/^ /,'')
    hit.sequence_name = sequence_name
    
    @log.debug "Now parsing table for #{sequence_name}"
    return [] if lines[1].match(/^   \[No individual domains that/)

    lines[3..(lines.length-1)].each do |line|
      annotation = Hit::Hsp.new
      
      splits = line.split(/\s+/)
      i = 1
      annotation.number = splits[i].to_i; i+=2
      annotation.score = splits[i].to_f; i+=1
      annotation.bias = splits[i].to_f; i+=1
      annotation.c_evalue = splits[i].to_f; i+=1
      annotation.i_evalue = splits[i].to_f; i+=1
      annotation.hmmfrom = splits[i].to_i; i+=1
      annotation.hmm_to = splits[i].to_i; i+=2
      annotation.alifrom = splits[i].to_i; i+=1
      annotation.ali_to = splits[i].to_i; i+=2
      annotation.envfrom = splits[i].to_i; i+=1
      annotation.env_to = splits[i].to_i; i+=2
      annotation.acc = splits[i].to_f
      hsps.push annotation
    end
    
    # Parse the second stanza and beyond
    current_hsp = nil
    (1..(stanzas.length-1)).each_with_index do |aln, index|
      next if stanzas[aln] == "\n"
      stanza = stanzas[aln].split("\n")
      line_offset = 0
      line_offset += 1 if index==0 #to account for the "Alignments for each domain:" line
      # Is this a new HSP being described?
      if matches = stanza[line_offset].match(/^  == domain (\d+)/)
        domain_index = matches[1].to_i-1
        current_hsp = hsps[domain_index]
        line_offset += 1 # to account for the "== domain 1    score: 26.8 bits;  conditional E-value: 5.7e-10" line
      end
      
      # Detect CS and RF lines
      line_offset += 1 if stanza[line_offset].split(/\s+/)[2] == 'CS'
      line_offset += 1 if stanza[line_offset].split(/\s+/)[2] == 'RF'
      
      # Add the lines to the relevant places
      current_hsp.hmmseq ||= ''
      current_hsp.hmmseq += stanza[line_offset].split(/\s+/)[3]
      current_hsp.flatseq ||= ''
      current_hsp.flatseq += stanza[2+line_offset].split(/\s+/)[3]
    end
    
    hit.hsps = hsps
    alignments.push hit
    
    @log.debug "Parsed alignments for sequence #{hit.sequence_name}" if @log.debug?
  end
  
  return alignments
end

#queryObject



83
84
85
86
87
# File 'lib/bio/appl/hmmer/hmmer3/default_report.rb', line 83

def query
  return @query unless @query.nil?
  parse_query
  return @query
end

#query_accessionObject



89
90
91
92
93
# File 'lib/bio/appl/hmmer/hmmer3/default_report.rb', line 89

def query_accession
  return @query_accession unless @query_accession.nil?
  parse_query
  return @query_accession
end

#query_descriptionObject



95
96
97
98
99
# File 'lib/bio/appl/hmmer/hmmer3/default_report.rb', line 95

def query_description
  return @query_description unless @query_description.nil?
  parse_query
  return @query_description
end