Class: Proph::Prot::Parser
- Inherits:
-
Proph::Parser
- Object
- Proph::Parser
- Proph::Prot::Parser
- Defined in:
- lib/spec_id/proph.rb
Overview
Class for parsing the ‘*-prot.xml’ files in different ways
Instance Attribute Summary collapse
-
#peps ⇒ Object
returns all the peptides from prots.
-
#prots ⇒ Object
Returns the value of attribute prots.
Instance Method Summary collapse
-
#get_prots_and_peps(protxmlfile, prot_prob_cutoff = 1.0, pep_init_prob_cutoff = 1.0, pep_nsp_prob_cutoff = 1.0, parse_type = "rexml") ⇒ Object
sets and returns an array of Prot objects parse_type = “rexml” | “regex”.
-
#initialize ⇒ Parser
constructor
A new instance of Parser.
Methods inherited from Proph::Parser
Constructor Details
#initialize ⇒ Parser
Returns a new instance of Parser.
361 362 363 |
# File 'lib/spec_id/proph.rb', line 361 def initialize @prots = [] end |
Instance Attribute Details
#peps ⇒ Object
returns all the peptides from prots
366 367 368 369 370 371 372 373 374 |
# File 'lib/spec_id/proph.rb', line 366 def peps unless @peps @peps = [] @prots.each do |prot| @peps.push(*(prot.peps)) end end @peps end |
#prots ⇒ Object
Returns the value of attribute prots.
358 359 360 |
# File 'lib/spec_id/proph.rb', line 358 def prots @prots end |
Instance Method Details
#get_prots_and_peps(protxmlfile, prot_prob_cutoff = 1.0, pep_init_prob_cutoff = 1.0, pep_nsp_prob_cutoff = 1.0, parse_type = "rexml") ⇒ Object
sets and returns an array of Prot objects parse_type = “rexml” | “regex”
379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 |
# File 'lib/spec_id/proph.rb', line 379 def get_prots_and_peps(protxmlfile, prot_prob_cutoff=1.0, pep_init_prob_cutoff=1.0, pep_nsp_prob_cutoff=1.0, parse_type="rexml") ## ensure these are all floats (prot_prob_cutoff, pep_init_prob_cutoff, pep_nsp_prob_cutoff) = [prot_prob_cutoff, pep_init_prob_cutoff, pep_nsp_prob_cutoff].collect do |cutoff| cutoff.to_f end case parse_type when "rexml" doc = REXML::Document.new File.new(protxmlfile) doc.elements.each("protein_summary/protein_group/protein") do |elem| if elem.attributes['probability'].to_f >= prot_prob_cutoff prob = elem.attributes['probability'].to_f name= elem.attributes['protein_name'] curr_prot = Prot.new({:probability => prob, :protein_name => name, :cutoff => prot_prob_cutoff}) peptides = [] elem.elements.to_a('peptide').each do |pep| if pep.attributes['nsp_adjusted_probability'].to_f >= pep_nsp_prob_cutoff && pep.attributes['initial_probability'].to_f >= pep_init_prob_cutoff nsp_prob = pep.attributes['nsp_adjusted_probability'].to_f sequence = pep.attributes['peptide_sequence'] charge = pep.attributes['charge'] pnm = pep.attributes['precursor_neutral_mass'] peptides.push(Pep.new(:probability => nsp_prob, :sequence => sequence, :charge => charge, :precursor_neutral_mass => pnm, :nsp_cutoff => pep_nsp_prob_cutoff)) end ## Only take proteins with peptides! if peptides.size > 0 curr_prot.peps = peptides @prots << curr_prot end end end end when "regex" prot_regex = /<protein protein_name="(.*)?" n_indistinguishable_proteins(.*)/o prot_prob_regex = /probability="([\d\.]+)"/o pep_regex = /<peptide peptide_sequence="(\w+)?"(.*)/o pep_else_regex = /charge="(\d)" initial_probability="([\d\.]+)" nsp_adjusted_probability="([\d\.]+)"/o curr_prot = nil peptides = [] File.open(protxmlfile).each do |line| if line =~ prot_regex prob = nil name = $1.dup rest = $2 if rest =~ prot_prob_regex prob = $1.dup end if curr_prot if curr_prot.probability.to_f >= prot_prob_cutoff if peptides.size > 0 curr_prot.peps = peptides @prots.push(curr_prot) end end end curr_prot = Prot.new({:probability => prob, :protein_name => name, :cutoff => prot_prob_cutoff}) peptides = [] end if line =~ pep_regex sequence = $1.dup rest = $2 if rest =~ pep_else_regex charge = $1 init_prob = $2 nsp_prob = $3 if nsp_prob.to_f >= pep_nsp_prob_cutoff && init_prob.to_f >= pep_init_prob_cutoff peptides.push(Pep.new(:probability => nsp_prob, :sequence => sequence, :charge => charge, :nsp_cutoff => pep_nsp_prob_cutoff)) end end end # get the last one: if curr_prot && curr_prot.probability.to_f > prot_prob_cutoff && peptides.size > 0 curr_prot.peps = peptides @prots.push(curr_prot) end end end @prots end |