Class: MzIdentMLDoc

Inherits:
Object
  • Object
show all
Defined in:
lib/protk/mzidentml_doc.rb

Constant Summary collapse

MZID_NS_PREFIX =
"mzidentml"
MZID_NS =
'http://psidev.info/psi/pi/mzIdentML/1.1'

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(path) ⇒ MzIdentMLDoc

Returns a new instance of MzIdentMLDoc.



35
36
37
38
# File 'lib/protk/mzidentml_doc.rb', line 35

def initialize(path)
	parser=XML::Parser.file(path)
	@document=parser.parse
end

Instance Attribute Details

#db_sequence_cacheObject (readonly)

Returns the value of attribute db_sequence_cache.



11
12
13
# File 'lib/protk/mzidentml_doc.rb', line 11

def db_sequence_cache
  @db_sequence_cache
end

#psms_cacheObject (readonly)

Returns the value of attribute psms_cache.



10
11
12
# File 'lib/protk/mzidentml_doc.rb', line 10

def psms_cache
  @psms_cache
end

Class Method Details

.find(node, expression, root = false) ⇒ Object


Class Level Utility methods for searching from a given node




97
98
99
100
# File 'lib/protk/mzidentml_doc.rb', line 97

def self.find(node,expression,root=false)
	pp = root ? "//" : "./"
	node.find("#{pp}#{MZID_NS_PREFIX}:#{expression}","#{MZID_NS_PREFIX}:#{MZID_NS}")
end

Instance Method Details

#analysis_softwareObject



52
53
54
# File 'lib/protk/mzidentml_doc.rb', line 52

def analysis_software
	@document.find("//#{MZID_NS_PREFIX}:AnalysisSoftware","#{MZID_NS_PREFIX}:#{MZID_NS}")
end

#dbsequence_cacheObject



24
25
26
27
28
29
30
31
32
33
# File 'lib/protk/mzidentml_doc.rb', line 24

def dbsequence_cache
	if !@dbsequence_cache
		@dbsequence_cache={}
		Constants.instance.log "Generating DB index" , :debug
		self.dbsequences.each do |db_sequence|  
			@dbsequence_cache[db_sequence.attributes['accession']]=db_sequence
		end
	end
	@dbsequence_cache
end

#dbsequencesObject



68
69
70
# File 'lib/protk/mzidentml_doc.rb', line 68

def dbsequences
	@document.find("//#{MZID_NS_PREFIX}:DBSequence","#{MZID_NS_PREFIX}:#{MZID_NS}")		
end

#enzymesObject



48
49
50
# File 'lib/protk/mzidentml_doc.rb', line 48

def enzymes
	@document.find("//#{MZID_NS_PREFIX}:Enzyme","#{MZID_NS_PREFIX}:#{MZID_NS}")
end

#find(node, expression, root = false) ⇒ Object



102
103
104
# File 'lib/protk/mzidentml_doc.rb', line 102

def find(node,expression,root=false)
	MzIdentMLDoc.find(node,expression,root)
end

#get_best_psm_for_peptide(peptide_node) ⇒ Object

<PeptideHypothesis peptideEvidence_ref=“PepEv_1”> <SpectrumIdentificationItemRef spectrumIdentificationItem_ref=“SII_1_1”/> </PeptideHypothesis>



148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
# File 'lib/protk/mzidentml_doc.rb', line 148

def get_best_psm_for_peptide(peptide_node)
	best_score=nil
	best_psm=nil
	spectrumidrefs = self.find(peptide_node,"SpectrumIdentificationItemRef")
	Constants.instance.log "Searching from among #{spectrumidrefs.length} for best psm" , :debug

	spectrumidrefs.each do |id_ref_node|  
		id_ref = id_ref_node.attributes['spectrumIdentificationItem_ref']
		# psm_node = self.find(peptide_node,"SpectrumIdentificationItem[@id=\'#{id_ref}\']",true)[0]
		psm_node = self.psms_cache[id_ref]
		score = self.get_cvParam(psm_node,"MS:1002466")['value'].to_f
		if ( best_score == nil ) || ( score > best_score )
			best_psm=psm_node
			best_score=score
		end
	end
	best_psm
end

#get_cvParam(mzidnode, accession) ⇒ Object



107
108
109
# File 'lib/protk/mzidentml_doc.rb', line 107

def get_cvParam(mzidnode,accession)
	self.find(mzidnode,"cvParam[@accession=\'#{accession}\']")[0]
end

#get_dbsequence(mzidnode, accession) ⇒ Object



111
112
113
114
# File 'lib/protk/mzidentml_doc.rb', line 111

def get_dbsequence(mzidnode,accession)
	self.dbsequence_cache[accession]
	# self.find(mzidnode,"DBSequence[@accession=\'#{accession}\']",true)[0]
end

#get_peptide_evidence_from_psm(psm_node) ⇒ Object



181
182
183
184
185
186
187
188
# File 'lib/protk/mzidentml_doc.rb', line 181

def get_peptide_evidence_from_psm(psm_node)
	pe_nodes = []
	self.find(psm_node,"PeptideEvidenceRef").each do |pe_node|
		ev_id=pe_node.attributes['peptideEvidence_ref']   
		pe_nodes << self.find(pe_node,"PeptideEvidence[@id=\'#{ev_id}\']",true)[0]
	end
	pe_nodes
end

#get_peptides_for_protein(protein_node) ⇒ Object

def self.get_sister_proteins(protein_node) self.find(protein_node.parent,“ProteinDetectionHypothesis”) end



141
142
143
# File 'lib/protk/mzidentml_doc.rb', line 141

def get_peptides_for_protein(protein_node)
	self.find(protein_node,"PeptideHypothesis")
end

#get_protein_probability(protein_node) ⇒ Object

As per PeptideShaker. Assume group probability used for protein if it is group rep otherwise 0



117
118
119
120
121
122
123
124
125
126
# File 'lib/protk/mzidentml_doc.rb', line 117

def get_protein_probability(protein_node)

	#MS:1002403
	is_group_representative=(self.get_cvParam(protein_node,"MS:1002403")!=nil)
	if is_group_representative
		return 	self.get_cvParam(protein_node.parent,"MS:1002470").attributes['value'].to_f*0.01
	else
		return 0
	end
end

#get_proteins_for_group(group_node) ⇒ Object

Memoized because it gets called for every protein in a group



129
130
131
132
133
134
135
# File 'lib/protk/mzidentml_doc.rb', line 129

def get_proteins_for_group(group_node)
	# puts group_node.attributes['group_number']
	@proteins_for_group_cache ||= Hash.new do |h,key|
		h[key] = self.find(key,"ProteinDetectionHypothesis")
	end
	@proteins_for_group_cache[group_node]
end

#get_sequence_for_peptide(peptide_node) ⇒ Object



167
168
169
170
171
172
173
# File 'lib/protk/mzidentml_doc.rb', line 167

def get_sequence_for_peptide(peptide_node)
	evidence_ref = peptide_node.attributes['peptideEvidence_ref']
	pep_ref = peptide_node.find("//#{MZID_NS_PREFIX}:PeptideEvidence[@id=\'#{evidence_ref}\']","#{MZID_NS_PREFIX}:#{MZID_NS}")[0].attributes['peptide_ref']
	peptide=peptide_node.find("//#{MZID_NS_PREFIX}:Peptide[@id=\'#{pep_ref}\']","#{MZID_NS_PREFIX}:#{MZID_NS}")[0]
	# require 'byebug';byebug
	peptide.find("./#{MZID_NS_PREFIX}:PeptideSequence","#{MZID_NS_PREFIX}:#{MZID_NS}")[0].content
end

#get_sequence_for_psm(psm_node) ⇒ Object



175
176
177
178
179
# File 'lib/protk/mzidentml_doc.rb', line 175

def get_sequence_for_psm(psm_node)
	pep_ref = psm_node.attributes['peptide_ref']
	peptide=psm_node.find("//#{MZID_NS_PREFIX}:Peptide[@id=\'#{pep_ref}\']","#{MZID_NS_PREFIX}:#{MZID_NS}")[0]
	peptide.find("./#{MZID_NS_PREFIX}:PeptideSequence","#{MZID_NS_PREFIX}:#{MZID_NS}")[0].content
end

#peptide_evidenceObject



60
61
62
# File 'lib/protk/mzidentml_doc.rb', line 60

def peptide_evidence
	@document.find("//#{MZID_NS_PREFIX}:PeptideEvidence","#{MZID_NS_PREFIX}:#{MZID_NS}")
end

#peptidesObject

Peptides are referenced in many ways in mzidentml. We define a “Peptide” as a peptide supporting a particular protein Such peptides may encompass several PSM’s



85
86
87
# File 'lib/protk/mzidentml_doc.rb', line 85

def peptides
	@document.find("//#{MZID_NS_PREFIX}:PeptideHypothesis","#{MZID_NS_PREFIX}:#{MZID_NS}")
end

#protein_groupsObject



72
73
74
# File 'lib/protk/mzidentml_doc.rb', line 72

def protein_groups
	@document.find("//#{MZID_NS_PREFIX}:ProteinAmbiguityGroup","#{MZID_NS_PREFIX}:#{MZID_NS}")
end

#proteinsObject



77
78
79
# File 'lib/protk/mzidentml_doc.rb', line 77

def proteins
	@document.find("//#{MZID_NS_PREFIX}:ProteinDetectionHypothesis","#{MZID_NS_PREFIX}:#{MZID_NS}")
end

#psmsObject



64
65
66
# File 'lib/protk/mzidentml_doc.rb', line 64

def psms
	@document.find("//#{MZID_NS_PREFIX}:SpectrumIdentificationItem","#{MZID_NS_PREFIX}:#{MZID_NS}")
end

#search_databasesObject



44
45
46
# File 'lib/protk/mzidentml_doc.rb', line 44

def search_databases
	@document.find("//#{MZID_NS_PREFIX}:SearchDatabase","#{MZID_NS_PREFIX}:#{MZID_NS}")
end

#source_filesObject



40
41
42
# File 'lib/protk/mzidentml_doc.rb', line 40

def source_files
	@document.find("//#{MZID_NS_PREFIX}:SourceFile","#{MZID_NS_PREFIX}:#{MZID_NS}")
end

#spectrum_queriesObject



56
57
58
# File 'lib/protk/mzidentml_doc.rb', line 56

def spectrum_queries
	@document.find("//#{MZID_NS_PREFIX}:SpectrumIdentificationResult","#{MZID_NS_PREFIX}:#{MZID_NS}")
end