Class: Bio::HMMER::HMMER3::DefaultHMMSearchReport

Inherits:

Object

Object
Bio::HMMER::HMMER3::DefaultHMMSearchReport

show all

Defined in:: lib/bio/appl/hmmer/hmmer3/default_report.rb

Overview

This class is for parsing HMMSearch outputs from the default output

Defined Under Namespace

Classes: Hit

Constant Summary collapse

DELIMITER = Delimiter of each entry for Bio::FlatFile support.

RS = "\n//\n"

Instance Method Summary collapse

#hits ⇒ Object

Return an array of HMMER3::Hit objects from this report.
#initialize(data) ⇒ DefaultHMMSearchReport constructor

A new instance of DefaultHMMSearchReport.
#query ⇒ Object
#query_accession ⇒ Object
#query_description ⇒ Object

Constructor Details

#initialize(data) ⇒ `DefaultHMMSearchReport`

Returns a new instance of DefaultHMMSearchReport.

# File 'lib/bio/appl/hmmer/hmmer3/default_report.rb', line 30

def initialize(data)
  # The input data is divided into chunks, a hash called @report_chunks (previously called subdata)
  @report_chunks = get_subdata(data)
  
  @log = Bio::Log::LoggerPlus['bio-hmmer3report']
end

Instance Method Details

#hits ⇒ `Object`

Return an array of HMMER3::Hit objects from this report

# File 'lib/bio/appl/hmmer/hmmer3/default_report.rb', line 109

def hits
  return [] unless @report_chunks['alignment'].match(/^>>/)
  # For each hit sequence (hits)
  sequence_annotations = @report_chunks['alignment'].split(">>")
  # puts "Found #{sequence_annotations.length} different hits e.g. #{sequence_annotations[0]}\n\n and #{sequence_annotations[1]} and \n\n #{sequence_annotations[sequence_annotations.length-1]}"
  
  alignments = []
  sequence_annotations.each_with_index do |seq_annot, i|
    #Ignore the first split as it is rubbish leftover from the split above
    next if i==0
    
    # Now split on \n\n. Each of these splits should have 1 or more domains associated
    
    #First of this split will be "stanzas" like this
    #>> 637984252  Acid345_2236 D-isomer specific 2-hydroxyacid dehydrogenase, NAD-binding [Korebacter versatilis Ellin345]
    #   #    score  bias  c-Evalue  i-Evalue hmmfrom  hmm to    alifrom  ali to    envfrom  env to     acc
    # ---   ------ ----- --------- --------- ------- -------    ------- -------    ------- -------    ----
    #   1 !  120.2   0.5   7.6e-39   1.2e-35       1     133 []       3     313 ..       3     313 .. 0.98
    
    #And AFTER the first split comes "stanzas" like this
    #  Alignments for each domain:
    #  == domain 1    score: 120.2 bits;  conditional E-value: 7.6e-39
    #                 EEECST.-CCHHHHHCC..TEEEEEEC.GSSHHHHHC....-SEEEE-TTS-BSHHHHCC-TT--EEEES----TTB-HHHHHH---EEE--TTTTHHH CS
    #                 xxxxxxxxxxxxxxxxx..xxxxxxxx.xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx RF
    #  2-Hacid_dh   1 vlileplreeelellke..gvevevkd.ellteellekakdadalivrsntkvtaevleklpkLkviatagvGvDniDldaakerGIlVtnvpgystes 96 
    #                 +++ e++ +++++l+k+    +v++ d   ++e+lle++k+adalivrs   v+a++le++ +L+vi++agvGvDni l+aa+++GI+V+n+pg+++ +
    #   637982115   3 IVVAEKIAKAAIDLFKQdpTWNVVTPDqVAQKEQLLEQLKGADALIVRSAVFVDAAMLEHADQLRVIGRAGVGVDNIELEAATRKGIAVMNTPGANAIA 101
    #                 6889999**********8544777777778888****************************************************************** PP
    stanzas = seq_annot.split("\n\n")
    
    hit = Hit.new
    hsps = []
    
    # Parse the first
    lines = stanzas[0].split("\n")
    sequence_name = lines[0].gsub(/^ /,'')
    hit.sequence_name = sequence_name
    
    @log.debug "Now parsing table for #{sequence_name}"
    return [] if lines[1].match(/^   \[No individual domains that/)

    lines[3..(lines.length-1)].each do |line|
      annotation = Hit::Hsp.new
      
      splits = line.split(/\s+/)
      i = 1
      annotation.number = splits[i].to_i; i+=2
      annotation.score = splits[i].to_f; i+=1
      annotation.bias = splits[i].to_f; i+=1
      annotation.c_evalue = splits[i].to_f; i+=1
      annotation.i_evalue = splits[i].to_f; i+=1
      annotation.hmmfrom = splits[i].to_i; i+=1
      annotation.hmm_to = splits[i].to_i; i+=2
      annotation.alifrom = splits[i].to_i; i+=1
      annotation.ali_to = splits[i].to_i; i+=2
      annotation.envfrom = splits[i].to_i; i+=1
      annotation.env_to = splits[i].to_i; i+=2
      annotation.acc = splits[i].to_f
      hsps.push annotation
    end
    
    # Parse the second stanza and beyond
    current_hsp = nil
    (1..(stanzas.length-1)).each_with_index do |aln, index|
      next if stanzas[aln] == "\n"
      stanza = stanzas[aln].split("\n")
      line_offset = 0
      line_offset += 1 if index==0 #to account for the "Alignments for each domain:" line
      # Is this a new HSP being described?
      if matches = stanza[line_offset].match(/^  == domain (\d+)/)
        domain_index = matches[1].to_i-1
        current_hsp = hsps[domain_index]
        line_offset += 1 # to account for the "== domain 1    score: 26.8 bits;  conditional E-value: 5.7e-10" line
      end
      
      # Detect CS and RF lines
      line_offset += 1 if stanza[line_offset].split(/\s+/)[2] == 'CS'
      line_offset += 1 if stanza[line_offset].split(/\s+/)[2] == 'RF'
      
      # Add the lines to the relevant places
      current_hsp.hmmseq ||= ''
      current_hsp.hmmseq += stanza[line_offset].split(/\s+/)[3]
      current_hsp.flatseq ||= ''
      current_hsp.flatseq += stanza[2+line_offset].split(/\s+/)[3]
    end
    
    hit.hsps = hsps
    alignments.push hit
    
    @log.debug "Parsed alignments for sequence #{hit.sequence_name}" if @log.debug?
  end
  
  return alignments
end

#query ⇒ `Object`

# File 'lib/bio/appl/hmmer/hmmer3/default_report.rb', line 83

def query
  return @query unless @query.nil?
  parse_query
  return @query
end

#query_accession ⇒ `Object`

# File 'lib/bio/appl/hmmer/hmmer3/default_report.rb', line 89

def query_accession
  return @query_accession unless @query_accession.nil?
  parse_query
  return @query_accession
end

#query_description ⇒ `Object`

# File 'lib/bio/appl/hmmer/hmmer3/default_report.rb', line 95

def query_description
  return @query_description unless @query_description.nil?
  parse_query
  return @query_description
end