Class: Proph::Prot::Parser

Inherits:
Proph::Parser show all
Defined in:
lib/spec_id/proph.rb

Overview

Class for parsing the ‘*-prot.xml’ files in different ways

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods inherited from Proph::Parser

#root_el

Constructor Details

#initializeParser

Returns a new instance of Parser.



361
362
363
# File 'lib/spec_id/proph.rb', line 361

def initialize
  @prots = []
end

Instance Attribute Details

#pepsObject

returns all the peptides from prots



366
367
368
369
370
371
372
373
374
# File 'lib/spec_id/proph.rb', line 366

def peps
  unless @peps
    @peps = []
    @prots.each do |prot|
      @peps.push(*(prot.peps)) 
    end
  end
  @peps
end

#protsObject

Returns the value of attribute prots.



358
359
360
# File 'lib/spec_id/proph.rb', line 358

def prots
  @prots
end

Instance Method Details

#get_prots_and_peps(protxmlfile, prot_prob_cutoff = 1.0, pep_init_prob_cutoff = 1.0, pep_nsp_prob_cutoff = 1.0, parse_type = "rexml") ⇒ Object

sets and returns an array of Prot objects parse_type = “rexml” | “regex”



379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
# File 'lib/spec_id/proph.rb', line 379

def get_prots_and_peps(protxmlfile, prot_prob_cutoff=1.0, pep_init_prob_cutoff=1.0, pep_nsp_prob_cutoff=1.0, parse_type="rexml")
  ## ensure these are all floats
  (prot_prob_cutoff, pep_init_prob_cutoff, pep_nsp_prob_cutoff) = [prot_prob_cutoff, pep_init_prob_cutoff, pep_nsp_prob_cutoff].collect do |cutoff|
    cutoff.to_f
  end

  case parse_type
  when "rexml"
    doc = REXML::Document.new File.new(protxmlfile)
    doc.elements.each("protein_summary/protein_group/protein") do |elem|
      if elem.attributes['probability'].to_f >= prot_prob_cutoff
        prob = elem.attributes['probability'].to_f
        name= elem.attributes['protein_name']
        curr_prot = Prot.new({:probability => prob, :protein_name => name, :cutoff => prot_prob_cutoff})
        peptides = []
        elem.elements.to_a('peptide').each do |pep|
          if pep.attributes['nsp_adjusted_probability'].to_f >= pep_nsp_prob_cutoff && pep.attributes['initial_probability'].to_f >= pep_init_prob_cutoff
            nsp_prob = pep.attributes['nsp_adjusted_probability'].to_f
            sequence = pep.attributes['peptide_sequence']
            charge = pep.attributes['charge']
            pnm = pep.attributes['precursor_neutral_mass']
            peptides.push(Pep.new(:probability => nsp_prob, :sequence => sequence, :charge => charge, :precursor_neutral_mass => pnm, :nsp_cutoff => pep_nsp_prob_cutoff))
          end
          ## Only take proteins with peptides!
          if peptides.size > 0 
            curr_prot.peps = peptides
            @prots << curr_prot
          end
        end
      end
    end
  when "regex"
    prot_regex = /<protein protein_name="(.*)?" n_indistinguishable_proteins(.*)/o
    prot_prob_regex = /probability="([\d\.]+)"/o
    pep_regex = /<peptide peptide_sequence="(\w+)?"(.*)/o
    pep_else_regex = /charge="(\d)" initial_probability="([\d\.]+)" nsp_adjusted_probability="([\d\.]+)"/o

    curr_prot = nil
    peptides = []
    File.open(protxmlfile).each do |line|
      if line =~ prot_regex
        prob = nil
        name = $1.dup
        rest = $2
        if rest =~ prot_prob_regex
          prob = $1.dup
        end
        if curr_prot 
          if curr_prot.probability.to_f >= prot_prob_cutoff 
            if peptides.size > 0
              curr_prot.peps = peptides
              @prots.push(curr_prot)
            end
          end
        end
        curr_prot = Prot.new({:probability => prob, :protein_name => name, :cutoff => prot_prob_cutoff})
        peptides = []
      end
      if line =~ pep_regex
        sequence = $1.dup
        rest = $2
        if rest =~ pep_else_regex
          charge = $1
          init_prob = $2
          nsp_prob = $3
          if nsp_prob.to_f >= pep_nsp_prob_cutoff && init_prob.to_f >= pep_init_prob_cutoff
            peptides.push(Pep.new(:probability => nsp_prob, :sequence => sequence, :charge => charge, :nsp_cutoff => pep_nsp_prob_cutoff))
          end
        end
      end
      # get the last one:
      if curr_prot && curr_prot.probability.to_f > prot_prob_cutoff && peptides.size > 0
        curr_prot.peps = peptides
        @prots.push(curr_prot)
      end
    end
  end
  @prots
end