Class: Spec::MzXML::Parser
- Inherits:
-
Object
- Object
- Spec::MzXML::Parser
- Defined in:
- lib/spec/mzxml/parser.rb
Instance Method Summary collapse
- #_el(name) ⇒ Object
-
#basic_info(mzxml_file) ⇒ Object
Returns a hash of basic info on an mzXML run: mzXML_elemt *hash keys (symbols)* scanCount scan_count startTime start_time endTime end_time startMz start_mz endMz end_mz.
- #default_parser ⇒ Object
- #get_prec_mz_by_scan_for_time_index(file) ⇒ Object
-
#initialize(file = nil, parse_type = :parse, *args) ⇒ Parser
constructor
A new instance of Parser.
-
#parse(file) ⇒ Object
Parse into a complete object structure (REXML??).
-
#precursor_mz_and_inten_by_scan(file) ⇒ Object
Returns hash where hash = [precursorMz, precursorIntensity] Parent scans are not hashed Keys and values are both strings.
-
#precursor_mz_by_scan(file, parse_type = nil) ⇒ Object
Returns array where array = precursorMz Parent scans are not arrayed Values are strings.
-
#precursor_mz_by_scan_for_path(path, extension, parse_type = nil) ⇒ Object
Returns a Hash indexed by filename (with no extension) for a given path extension = glob (string) or regex The basename is given as: file.split(‘.’).first.
-
#scans_by_num(mzXML_file, parse_type = nil) ⇒ Object
Returns an array of scans indexed by scan number NOTE that the first scan (zero indexed) will likely be nil! accepts an optional parse_type = ‘xmlparser’ | ‘rexml’.
-
#start_and_end_mz(mzxml_file) ⇒ Object
returns [start_mz, end_mz] of the first full scan (ms_level == 1).
-
#times_and_spectra(file) ⇒ Object
returns: [times_arr, [m/z,inten,m/z,inten…]] where times are time strings (in seconds).
Constructor Details
#initialize(file = nil, parse_type = :parse, *args) ⇒ Parser
Returns a new instance of Parser.
194 195 196 197 198 |
# File 'lib/spec/mzxml/parser.rb', line 194 def initialize(file=nil, parse_type=:parse, *args) if file send(parse_type, file, *args) end end |
Instance Method Details
#_el(name) ⇒ Object
400 401 402 403 404 405 406 407 408 409 410 |
# File 'lib/spec/mzxml/parser.rb', line 400 def _el(name) re = /#{name}="(.*)"/ while @line !~ re && !@fh.eof? @line = @fh.readline end if $1 return $1.dup else return nil end end |
#basic_info(mzxml_file) ⇒ Object
Returns a hash of basic info on an mzXML run:
*mzXML_elemt* *hash keys (symbols)*
scanCount scan_count
startTime start_time
endTime end_time
startMz start_mz
endMz end_mz
347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 |
# File 'lib/spec/mzxml/parser.rb', line 347 def basic_info(mzxml_file) puts "parsing: #{mzxml_file} #{File.exist?(mzxml_file)}" if $VERBOSE hash = {} scan_count_tmp = [] (1..5).to_a.each do |n| scan_count_tmp[n] = 0 end @fh = File.open(mzxml_file) @line = "" scan_count_tmp[0] = _el("scanCount").to_i hash[:start_time] = _el("startTime").sub(/^PT/, "").sub(/S$/,"").to_f hash[:end_time] = _el("endTime").sub(/^PT/, "").sub(/S$/,"").to_f hash[:ms_level] = _el("msLevel").to_i scan_count_tmp[1] = 1 if hash[:ms_level] == 1 hash[:start_mz] = _el("startMz").to_f hash[:end_mz] = _el("endMz").to_f end while !@fh.eof? @line = @fh.readline ms_level = _el("msLevel") if ms_level scan_count_tmp[ms_level.to_i] += 1 else break end end scan_count = [] scan_count_tmp.each do |cnt| if cnt != 0 scan_count.push cnt else break end end hash[:scan_count] = scan_count @fh.close hash end |
#default_parser ⇒ Object
180 181 182 183 184 185 186 187 188 189 190 191 192 |
# File 'lib/spec/mzxml/parser.rb', line 180 def default_parser xmlparser = false $".each do |lib| if lib =~ /xmlparser/ xmlparser = true end end if xmlparser return "xmlparser" else return "rexml" end end |
#get_prec_mz_by_scan_for_time_index(file) ⇒ Object
288 289 290 291 292 293 294 295 296 |
# File 'lib/spec/mzxml/parser.rb', line 288 def get_prec_mz_by_scan_for_time_index(file) index = Spec::MSRunIndex.new(file) prec_mz_by_scan = index.scans_by_num.collect do |scan| if scan ; scan.prec_mz else ; nil end end prec_mz_by_scan end |
#parse(file) ⇒ Object
Parse into a complete object structure (REXML??)
201 202 203 204 205 |
# File 'lib/spec/mzxml/parser.rb', line 201 def parse(file) # @TODO: write complete parser puts "need to write this guy!!!!" exit end |
#precursor_mz_and_inten_by_scan(file) ⇒ Object
Returns hash where hash = [precursorMz, precursorIntensity] Parent scans are not hashed Keys and values are both strings
284 285 286 |
# File 'lib/spec/mzxml/parser.rb', line 284 def precursor_mz_and_inten_by_scan(file) # in progress end |
#precursor_mz_by_scan(file, parse_type = nil) ⇒ Object
Returns array where array = precursorMz Parent scans are not arrayed Values are strings. Array index likely starts at 1! parse_type = “regex” | “rexml” | “xmlparser” also takes a MSRunIndex file (terminates with ‘.timeIndex’) also takes .RAW or .raw files and converts them to mzXML using Spec::MzXML::MZXML_CONVERTER also takes a file without an extension, in which case tests to see if the index file exists, then the .mzXML file, then .RAW/.raw (and converts)
307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 |
# File 'lib/spec/mzxml/parser.rb', line 307 def precursor_mz_by_scan(file, parse_type=nil) # If given a time index file: if File.exist?(file + '.timeIndex') return get_prec_mz_by_scan_for_time_index(file + '.timeIndex') elsif File.exist?(file + '.mzXML.timeIndex') return get_prec_mz_by_scan_for_time_index(file + '.mzXML.timeIndex') elsif file =~ /\.timeIndex$/ return get_prec_mz_by_scan_for_time_index(file) end file = Spec::MzXML.file_to_mzxml(file) unless parse_type then parse_type = default_parser end case parse_type when "xmlparser" ##XMLParser: parser = Spec::MzXML::XMLParser::PrecMzByNum.new File.open(file) do |fh| parser.parse(fh.read) end parser.prec_mz when "regex" Spec::MzXML::Regexp.precursor_mz_by_scan(file) when "rexml" listener = Spec::MzXML::REXMLStreamListener::PrecMzByNum.new REXML::Document.parse_stream(File.new(file), listener) listener.prec_mz else puts "Don't recognize parse_type: #{parse_type}" end end |
#precursor_mz_by_scan_for_path(path, extension, parse_type = nil) ⇒ Object
Returns a Hash indexed by filename (with no extension) for a given path extension = glob (string) or regex The basename is given as: file.split(‘.’).first
260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 |
# File 'lib/spec/mzxml/parser.rb', line 260 def precursor_mz_by_scan_for_path(path, extension, parse_type=nil) hash = {} Dir.chdir path do files = [] if extension.class == String files = Dir[extension] elsif extension.class == Regexp files = Dir.entries(".").find_all do |dir| dir =~ extension end else puts "extension: #{extension} not a String or Regexp!" end files.each do |file| base = file.split('.').first hash[base] = precursor_mz_by_scan(file, parse_type) end end hash end |
#scans_by_num(mzXML_file, parse_type = nil) ⇒ Object
Returns an array of scans indexed by scan number NOTE that the first scan (zero indexed) will likely be nil! accepts an optional parse_type = ‘xmlparser’ | ‘rexml’
218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 |
# File 'lib/spec/mzxml/parser.rb', line 218 def scans_by_num(mzXML_file, parse_type=nil) unless parse_type parse_type = default_parser end scans = [] case parse_type when 'xmlparser' parser = Spec::MzXML::XMLParser::TimeMzIntenIndexer.new parser.parse(IO.read(mzXML_file)) scans = parser.scans_by_num when 'rexml' # use REXML # This is really too slow for files of this size doc = REXML::Document.new File.new(mzXML_file) doc.elements.each('msRun/scan') do |scan| rt = scan.attributes['retentionTime'] ## like PT0.154000S" level = scan.attributes['msLevel'] to_print = [] prec_mz = nil prec_int = nil if level.to_i != 1 scan.elements.each("precursorMz") do |prec| prec_mz = prec.text.to_f prec_int = prec.attributes["precursorIntensity"].to_f end end # remove the leading PT and trailing S on the retention time! rt = rt[2...-1] num = scan.attributes['num'].to_i scans[num] = Spec::Scan.new(num, scan.attributes['msLevel'].to_i, rt.to_f, prec_mz, prec_int) end #doc.elements else throw ArgumentError, "invalid parse type: #{parse_type}" end ## update the scans for parents Spec::Scan.add_parent_scan(scans) scans end |
#start_and_end_mz(mzxml_file) ⇒ Object
returns [start_mz, end_mz] of the first full scan (ms_level == 1)
387 388 389 390 391 392 393 394 395 396 397 398 |
# File 'lib/spec/mzxml/parser.rb', line 387 def start_and_end_mz(mzxml_file) @fh = File.open(mzxml_file) ms_level = 0 @line = "" while ms_level != 1 ms_level = _el("msLevel").to_i end start_mz = _el("startMz").to_f end_mz = _el("endMz").to_f @fh.close [start_mz, end_mz] end |
#times_and_spectra(file) ⇒ Object
returns: [times_arr, [m/z,inten,m/z,inten…]] where times are time strings (in seconds)
209 210 211 212 213 |
# File 'lib/spec/mzxml/parser.rb', line 209 def times_and_spectra(file) parser = Spec::MzXML::XMLParser::TimesAndSpectra.new parser.parse(IO.read(file)) parser.times_and_spectra end |