Class: MzIdentMLDoc
- Inherits:
-
Object
- Object
- MzIdentMLDoc
- Defined in:
- lib/protk/mzidentml_doc.rb
Constant Summary collapse
- MZID_NS_PREFIX =
"mzidentml"- MZID_NS =
'http://psidev.info/psi/pi/mzIdentML/1.1'
Instance Attribute Summary collapse
-
#db_sequence_cache ⇒ Object
readonly
Returns the value of attribute db_sequence_cache.
-
#psms_cache ⇒ Object
readonly
Returns the value of attribute psms_cache.
Class Method Summary collapse
-
.find(node, expression, root = false) ⇒ Object
———————————————————–.
Instance Method Summary collapse
- #analysis_software ⇒ Object
- #dbsequence_cache ⇒ Object
- #dbsequences ⇒ Object
- #enzymes ⇒ Object
- #find(node, expression, root = false) ⇒ Object
-
#get_best_psm_for_peptide(peptide_node) ⇒ Object
<PeptideHypothesis peptideEvidence_ref=“PepEv_1”> <SpectrumIdentificationItemRef spectrumIdentificationItem_ref=“SII_1_1”/> </PeptideHypothesis>.
- #get_cvParam(mzidnode, accession) ⇒ Object
- #get_dbsequence(mzidnode, accession) ⇒ Object
- #get_peptide_evidence_from_psm(psm_node) ⇒ Object
-
#get_peptides_for_protein(protein_node) ⇒ Object
def self.get_sister_proteins(protein_node) self.find(protein_node.parent,“ProteinDetectionHypothesis”) end.
-
#get_protein_probability(protein_node) ⇒ Object
As per PeptideShaker.
-
#get_proteins_for_group(group_node) ⇒ Object
Memoized because it gets called for every protein in a group.
- #get_sequence_for_peptide(peptide_node) ⇒ Object
- #get_sequence_for_psm(psm_node) ⇒ Object
-
#initialize(path) ⇒ MzIdentMLDoc
constructor
A new instance of MzIdentMLDoc.
- #peptide_evidence ⇒ Object
-
#peptides ⇒ Object
Peptides are referenced in many ways in mzidentml.
- #protein_groups ⇒ Object
- #proteins ⇒ Object
- #psms ⇒ Object
- #search_databases ⇒ Object
- #source_files ⇒ Object
- #spectrum_queries ⇒ Object
Constructor Details
#initialize(path) ⇒ MzIdentMLDoc
Returns a new instance of MzIdentMLDoc.
35 36 37 38 |
# File 'lib/protk/mzidentml_doc.rb', line 35 def initialize(path) parser=XML::Parser.file(path) @document=parser.parse end |
Instance Attribute Details
#db_sequence_cache ⇒ Object (readonly)
Returns the value of attribute db_sequence_cache.
11 12 13 |
# File 'lib/protk/mzidentml_doc.rb', line 11 def db_sequence_cache @db_sequence_cache end |
#psms_cache ⇒ Object (readonly)
Returns the value of attribute psms_cache.
10 11 12 |
# File 'lib/protk/mzidentml_doc.rb', line 10 def psms_cache @psms_cache end |
Class Method Details
.find(node, expression, root = false) ⇒ Object
Class Level Utility methods for searching from a given node
97 98 99 100 |
# File 'lib/protk/mzidentml_doc.rb', line 97 def self.find(node,expression,root=false) pp = root ? "//" : "./" node.find("#{pp}#{MZID_NS_PREFIX}:#{expression}","#{MZID_NS_PREFIX}:#{MZID_NS}") end |
Instance Method Details
#analysis_software ⇒ Object
52 53 54 |
# File 'lib/protk/mzidentml_doc.rb', line 52 def analysis_software @document.find("//#{MZID_NS_PREFIX}:AnalysisSoftware","#{MZID_NS_PREFIX}:#{MZID_NS}") end |
#dbsequence_cache ⇒ Object
24 25 26 27 28 29 30 31 32 33 |
# File 'lib/protk/mzidentml_doc.rb', line 24 def dbsequence_cache if !@dbsequence_cache @dbsequence_cache={} Constants.instance.log "Generating DB index" , :debug self.dbsequences.each do |db_sequence| @dbsequence_cache[db_sequence.attributes['accession']]=db_sequence end end @dbsequence_cache end |
#dbsequences ⇒ Object
68 69 70 |
# File 'lib/protk/mzidentml_doc.rb', line 68 def dbsequences @document.find("//#{MZID_NS_PREFIX}:DBSequence","#{MZID_NS_PREFIX}:#{MZID_NS}") end |
#enzymes ⇒ Object
48 49 50 |
# File 'lib/protk/mzidentml_doc.rb', line 48 def enzymes @document.find("//#{MZID_NS_PREFIX}:Enzyme","#{MZID_NS_PREFIX}:#{MZID_NS}") end |
#find(node, expression, root = false) ⇒ Object
102 103 104 |
# File 'lib/protk/mzidentml_doc.rb', line 102 def find(node,expression,root=false) MzIdentMLDoc.find(node,expression,root) end |
#get_best_psm_for_peptide(peptide_node) ⇒ Object
<PeptideHypothesis peptideEvidence_ref=“PepEv_1”> <SpectrumIdentificationItemRef spectrumIdentificationItem_ref=“SII_1_1”/> </PeptideHypothesis>
148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 |
# File 'lib/protk/mzidentml_doc.rb', line 148 def get_best_psm_for_peptide(peptide_node) best_score=nil best_psm=nil spectrumidrefs = self.find(peptide_node,"SpectrumIdentificationItemRef") Constants.instance.log "Searching from among #{spectrumidrefs.length} for best psm" , :debug spectrumidrefs.each do |id_ref_node| id_ref = id_ref_node.attributes['spectrumIdentificationItem_ref'] # psm_node = self.find(peptide_node,"SpectrumIdentificationItem[@id=\'#{id_ref}\']",true)[0] psm_node = self.psms_cache[id_ref] score = self.get_cvParam(psm_node,"MS:1002466")['value'].to_f if ( best_score == nil ) || ( score > best_score ) best_psm=psm_node best_score=score end end best_psm end |
#get_cvParam(mzidnode, accession) ⇒ Object
107 108 109 |
# File 'lib/protk/mzidentml_doc.rb', line 107 def get_cvParam(mzidnode,accession) self.find(mzidnode,"cvParam[@accession=\'#{accession}\']")[0] end |
#get_dbsequence(mzidnode, accession) ⇒ Object
111 112 113 114 |
# File 'lib/protk/mzidentml_doc.rb', line 111 def get_dbsequence(mzidnode,accession) self.dbsequence_cache[accession] # self.find(mzidnode,"DBSequence[@accession=\'#{accession}\']",true)[0] end |
#get_peptide_evidence_from_psm(psm_node) ⇒ Object
181 182 183 184 185 186 187 188 |
# File 'lib/protk/mzidentml_doc.rb', line 181 def get_peptide_evidence_from_psm(psm_node) pe_nodes = [] self.find(psm_node,"PeptideEvidenceRef").each do |pe_node| ev_id=pe_node.attributes['peptideEvidence_ref'] pe_nodes << self.find(pe_node,"PeptideEvidence[@id=\'#{ev_id}\']",true)[0] end pe_nodes end |
#get_peptides_for_protein(protein_node) ⇒ Object
def self.get_sister_proteins(protein_node) self.find(protein_node.parent,“ProteinDetectionHypothesis”) end
141 142 143 |
# File 'lib/protk/mzidentml_doc.rb', line 141 def get_peptides_for_protein(protein_node) self.find(protein_node,"PeptideHypothesis") end |
#get_protein_probability(protein_node) ⇒ Object
As per PeptideShaker. Assume group probability used for protein if it is group rep otherwise 0
117 118 119 120 121 122 123 124 125 126 |
# File 'lib/protk/mzidentml_doc.rb', line 117 def get_protein_probability(protein_node) #MS:1002403 is_group_representative=(self.get_cvParam(protein_node,"MS:1002403")!=nil) if is_group_representative return self.get_cvParam(protein_node.parent,"MS:1002470").attributes['value'].to_f*0.01 else return 0 end end |
#get_proteins_for_group(group_node) ⇒ Object
Memoized because it gets called for every protein in a group
129 130 131 132 133 134 135 |
# File 'lib/protk/mzidentml_doc.rb', line 129 def get_proteins_for_group(group_node) # puts group_node.attributes['group_number'] @proteins_for_group_cache ||= Hash.new do |h,key| h[key] = self.find(key,"ProteinDetectionHypothesis") end @proteins_for_group_cache[group_node] end |
#get_sequence_for_peptide(peptide_node) ⇒ Object
167 168 169 170 171 172 173 |
# File 'lib/protk/mzidentml_doc.rb', line 167 def get_sequence_for_peptide(peptide_node) evidence_ref = peptide_node.attributes['peptideEvidence_ref'] pep_ref = peptide_node.find("//#{MZID_NS_PREFIX}:PeptideEvidence[@id=\'#{evidence_ref}\']","#{MZID_NS_PREFIX}:#{MZID_NS}")[0].attributes['peptide_ref'] peptide=peptide_node.find("//#{MZID_NS_PREFIX}:Peptide[@id=\'#{pep_ref}\']","#{MZID_NS_PREFIX}:#{MZID_NS}")[0] # require 'byebug';byebug peptide.find("./#{MZID_NS_PREFIX}:PeptideSequence","#{MZID_NS_PREFIX}:#{MZID_NS}")[0].content end |
#get_sequence_for_psm(psm_node) ⇒ Object
175 176 177 178 179 |
# File 'lib/protk/mzidentml_doc.rb', line 175 def get_sequence_for_psm(psm_node) pep_ref = psm_node.attributes['peptide_ref'] peptide=psm_node.find("//#{MZID_NS_PREFIX}:Peptide[@id=\'#{pep_ref}\']","#{MZID_NS_PREFIX}:#{MZID_NS}")[0] peptide.find("./#{MZID_NS_PREFIX}:PeptideSequence","#{MZID_NS_PREFIX}:#{MZID_NS}")[0].content end |
#peptide_evidence ⇒ Object
60 61 62 |
# File 'lib/protk/mzidentml_doc.rb', line 60 def peptide_evidence @document.find("//#{MZID_NS_PREFIX}:PeptideEvidence","#{MZID_NS_PREFIX}:#{MZID_NS}") end |
#peptides ⇒ Object
Peptides are referenced in many ways in mzidentml. We define a “Peptide” as a peptide supporting a particular protein Such peptides may encompass several PSM’s
85 86 87 |
# File 'lib/protk/mzidentml_doc.rb', line 85 def peptides @document.find("//#{MZID_NS_PREFIX}:PeptideHypothesis","#{MZID_NS_PREFIX}:#{MZID_NS}") end |
#protein_groups ⇒ Object
72 73 74 |
# File 'lib/protk/mzidentml_doc.rb', line 72 def protein_groups @document.find("//#{MZID_NS_PREFIX}:ProteinAmbiguityGroup","#{MZID_NS_PREFIX}:#{MZID_NS}") end |
#proteins ⇒ Object
77 78 79 |
# File 'lib/protk/mzidentml_doc.rb', line 77 def proteins @document.find("//#{MZID_NS_PREFIX}:ProteinDetectionHypothesis","#{MZID_NS_PREFIX}:#{MZID_NS}") end |
#psms ⇒ Object
64 65 66 |
# File 'lib/protk/mzidentml_doc.rb', line 64 def psms @document.find("//#{MZID_NS_PREFIX}:SpectrumIdentificationItem","#{MZID_NS_PREFIX}:#{MZID_NS}") end |
#search_databases ⇒ Object
44 45 46 |
# File 'lib/protk/mzidentml_doc.rb', line 44 def search_databases @document.find("//#{MZID_NS_PREFIX}:SearchDatabase","#{MZID_NS_PREFIX}:#{MZID_NS}") end |
#source_files ⇒ Object
40 41 42 |
# File 'lib/protk/mzidentml_doc.rb', line 40 def source_files @document.find("//#{MZID_NS_PREFIX}:SourceFile","#{MZID_NS_PREFIX}:#{MZID_NS}") end |
#spectrum_queries ⇒ Object
56 57 58 |
# File 'lib/protk/mzidentml_doc.rb', line 56 def spectrum_queries @document.find("//#{MZID_NS_PREFIX}:SpectrumIdentificationResult","#{MZID_NS_PREFIX}:#{MZID_NS}") end |