Class: Proph::Prot::Parser

Inherits:

Proph::Parser

Object
Proph::Parser
Proph::Prot::Parser

show all

Defined in:: lib/spec_id/proph.rb

Overview

Class for parsing the ‘*-prot.xml’ files in different ways

Instance Attribute Summary collapse

#peps ⇒ Object

returns all the peptides from prots.
#prots ⇒ Object

Returns the value of attribute prots.

Instance Method Summary collapse

#get_prots_and_peps(protxmlfile, prot_prob_cutoff = 1.0, pep_init_prob_cutoff = 1.0, pep_nsp_prob_cutoff = 1.0, parse_type = "rexml") ⇒ Object

sets and returns an array of Prot objects parse_type = “rexml” | “regex”.
#initialize ⇒ Parser constructor

A new instance of Parser.

Methods inherited from Proph::Parser

#root_el

Constructor Details

#initialize ⇒ `Parser`

Returns a new instance of Parser.



361
362
363

# File 'lib/spec_id/proph.rb', line 361

def initialize
  @prots = []
end

Instance Attribute Details

#peps ⇒ `Object`

returns all the peptides from prots

# File 'lib/spec_id/proph.rb', line 366

def peps
  unless @peps
    @peps = []
    @prots.each do |prot|
      @peps.push(*(prot.peps)) 
    end
  end
  @peps
end

#prots ⇒ `Object`

Returns the value of attribute prots.



358
359
360

# File 'lib/spec_id/proph.rb', line 358

def prots
  @prots
end

Instance Method Details

#get_prots_and_peps(protxmlfile, prot_prob_cutoff = 1.0, pep_init_prob_cutoff = 1.0, pep_nsp_prob_cutoff = 1.0, parse_type = "rexml") ⇒ `Object`

sets and returns an array of Prot objects parse_type = “rexml” | “regex”

# File 'lib/spec_id/proph.rb', line 379

def get_prots_and_peps(protxmlfile, prot_prob_cutoff=1.0, pep_init_prob_cutoff=1.0, pep_nsp_prob_cutoff=1.0, parse_type="rexml")
  ## ensure these are all floats
  (prot_prob_cutoff, pep_init_prob_cutoff, pep_nsp_prob_cutoff) = [prot_prob_cutoff, pep_init_prob_cutoff, pep_nsp_prob_cutoff].collect do |cutoff|
    cutoff.to_f
  end

  case parse_type
  when "rexml"
    doc = REXML::Document.new File.new(protxmlfile)
    doc.elements.each("protein_summary/protein_group/protein") do |elem|
      if elem.attributes['probability'].to_f >= prot_prob_cutoff
        prob = elem.attributes['probability'].to_f
        name= elem.attributes['protein_name']
        curr_prot = Prot.new({:probability => prob, :protein_name => name, :cutoff => prot_prob_cutoff})
        peptides = []
        elem.elements.to_a('peptide').each do |pep|
          if pep.attributes['nsp_adjusted_probability'].to_f >= pep_nsp_prob_cutoff && pep.attributes['initial_probability'].to_f >= pep_init_prob_cutoff
            nsp_prob = pep.attributes['nsp_adjusted_probability'].to_f
            sequence = pep.attributes['peptide_sequence']
            charge = pep.attributes['charge']
            pnm = pep.attributes['precursor_neutral_mass']
            peptides.push(Pep.new(:probability => nsp_prob, :sequence => sequence, :charge => charge, :precursor_neutral_mass => pnm, :nsp_cutoff => pep_nsp_prob_cutoff))
          end
          ## Only take proteins with peptides!
          if peptides.size > 0 
            curr_prot.peps = peptides
            @prots << curr_prot
          end
        end
      end
    end
  when "regex"
    prot_regex = /<protein protein_name="(.*)?" n_indistinguishable_proteins(.*)/o
    prot_prob_regex = /probability="([\d\.]+)"/o
    pep_regex = /<peptide peptide_sequence="(\w+)?"(.*)/o
    pep_else_regex = /charge="(\d)" initial_probability="([\d\.]+)" nsp_adjusted_probability="([\d\.]+)"/o

    curr_prot = nil
    peptides = []
    File.open(protxmlfile).each do |line|
      if line =~ prot_regex
        prob = nil
        name = $1.dup
        rest = $2
        if rest =~ prot_prob_regex
          prob = $1.dup
        end
        if curr_prot 
          if curr_prot.probability.to_f >= prot_prob_cutoff 
            if peptides.size > 0
              curr_prot.peps = peptides
              @prots.push(curr_prot)
            end
          end
        end
        curr_prot = Prot.new({:probability => prob, :protein_name => name, :cutoff => prot_prob_cutoff})
        peptides = []
      end
      if line =~ pep_regex
        sequence = $1.dup
        rest = $2
        if rest =~ pep_else_regex
          charge = $1
          init_prob = $2
          nsp_prob = $3
          if nsp_prob.to_f >= pep_nsp_prob_cutoff && init_prob.to_f >= pep_init_prob_cutoff
            peptides.push(Pep.new(:probability => nsp_prob, :sequence => sequence, :charge => charge, :nsp_cutoff => pep_nsp_prob_cutoff))
          end
        end
      end
      # get the last one:
      if curr_prot && curr_prot.probability.to_f > prot_prob_cutoff && peptides.size > 0
        curr_prot.peps = peptides
        @prots.push(curr_prot)
      end
    end
  end
  @prots
end