Class: Spec::MzXML::Parser

Inherits:
Object
  • Object
show all
Defined in:
lib/spec/mzxml/parser.rb

Instance Method Summary collapse

Constructor Details

#initialize(file = nil, parse_type = :parse, *args) ⇒ Parser

Returns a new instance of Parser.



194
195
196
197
198
# File 'lib/spec/mzxml/parser.rb', line 194

def initialize(file=nil, parse_type=:parse, *args)
  if file
    send(parse_type, file, *args)
  end
end

Instance Method Details

#_el(name) ⇒ Object



400
401
402
403
404
405
406
407
408
409
410
# File 'lib/spec/mzxml/parser.rb', line 400

def _el(name)
  re = /#{name}="(.*)"/
  while @line !~ re && !@fh.eof?
    @line = @fh.readline
  end
  if $1
    return $1.dup
  else
    return nil
  end
end

#basic_info(mzxml_file) ⇒ Object

Returns a hash of basic info on an mzXML run:

*mzXML_elemt*   *hash keys (symbols)*
scanCount       scan_count
startTime       start_time
endTime         end_time
startMz         start_mz
endMz           end_mz


347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
# File 'lib/spec/mzxml/parser.rb', line 347

def basic_info(mzxml_file)
  puts "parsing: #{mzxml_file} #{File.exist?(mzxml_file)}" if $VERBOSE
  hash = {}
  scan_count_tmp = []
  (1..5).to_a.each do |n| scan_count_tmp[n] = 0 end
  @fh = File.open(mzxml_file)
  @line = ""
  scan_count_tmp[0] = _el("scanCount").to_i
  hash[:start_time] = _el("startTime").sub(/^PT/, "").sub(/S$/,"").to_f
  hash[:end_time] = _el("endTime").sub(/^PT/, "").sub(/S$/,"").to_f
  hash[:ms_level] = _el("msLevel").to_i
  scan_count_tmp[1] = 1
  if hash[:ms_level] == 1
    hash[:start_mz] = _el("startMz").to_f
    hash[:end_mz] = _el("endMz").to_f
  end

  while !@fh.eof?
    @line = @fh.readline 
    ms_level = _el("msLevel")
    if ms_level
      scan_count_tmp[ms_level.to_i] += 1
    else
      break
    end
  end
  scan_count = []
  scan_count_tmp.each do |cnt|
    if cnt != 0
      scan_count.push cnt
    else
      break
    end
  end
  hash[:scan_count] = scan_count
  @fh.close
  hash
end

#default_parserObject



180
181
182
183
184
185
186
187
188
189
190
191
192
# File 'lib/spec/mzxml/parser.rb', line 180

def default_parser
  xmlparser = false 
  $".each do |lib|
    if lib =~ /xmlparser/
      xmlparser = true
    end
  end
  if xmlparser
    return "xmlparser"
  else
    return "rexml"
  end
end

#get_prec_mz_by_scan_for_time_index(file) ⇒ Object



288
289
290
291
292
293
294
295
296
# File 'lib/spec/mzxml/parser.rb', line 288

def get_prec_mz_by_scan_for_time_index(file)
  index = Spec::MSRunIndex.new(file)
  prec_mz_by_scan = index.scans_by_num.collect do |scan|
    if scan ; scan.prec_mz
    else ; nil
    end
  end
  prec_mz_by_scan
end

#parse(file) ⇒ Object

Parse into a complete object structure (REXML??)



201
202
203
204
205
# File 'lib/spec/mzxml/parser.rb', line 201

def parse(file)
  # @TODO: write complete parser
  puts "need to write this guy!!!!"
  exit
end

#precursor_mz_and_inten_by_scan(file) ⇒ Object

Returns hash where hash = [precursorMz, precursorIntensity] Parent scans are not hashed Keys and values are both strings



284
285
286
# File 'lib/spec/mzxml/parser.rb', line 284

def precursor_mz_and_inten_by_scan(file)
  # in progress
end

#precursor_mz_by_scan(file, parse_type = nil) ⇒ Object

Returns array where array = precursorMz Parent scans are not arrayed Values are strings. Array index likely starts at 1! parse_type = “regex” | “rexml” | “xmlparser” also takes a MSRunIndex file (terminates with ‘.timeIndex’) also takes .RAW or .raw files and converts them to mzXML using Spec::MzXML::MZXML_CONVERTER also takes a file without an extension, in which case tests to see if the index file exists, then the .mzXML file, then .RAW/.raw (and converts)



307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
# File 'lib/spec/mzxml/parser.rb', line 307

def precursor_mz_by_scan(file, parse_type=nil)
  # If given a time index file:

  if File.exist?(file + '.timeIndex')
    return get_prec_mz_by_scan_for_time_index(file + '.timeIndex')
  elsif File.exist?(file + '.mzXML.timeIndex')
    return get_prec_mz_by_scan_for_time_index(file + '.mzXML.timeIndex')
  elsif file =~ /\.timeIndex$/
    return get_prec_mz_by_scan_for_time_index(file)
  end

  file = Spec::MzXML.file_to_mzxml(file)

  unless parse_type then parse_type = default_parser end
  case parse_type
  when "xmlparser"
    ##XMLParser:
    parser = Spec::MzXML::XMLParser::PrecMzByNum.new
    File.open(file) do |fh|
      parser.parse(fh.read)
    end
    parser.prec_mz
  when "regex"
    Spec::MzXML::Regexp.precursor_mz_by_scan(file)
  when "rexml"
    listener = Spec::MzXML::REXMLStreamListener::PrecMzByNum.new
    REXML::Document.parse_stream(File.new(file), listener)
    listener.prec_mz
  else
    puts "Don't recognize parse_type: #{parse_type}"
  end
end

#precursor_mz_by_scan_for_path(path, extension, parse_type = nil) ⇒ Object

Returns a Hash indexed by filename (with no extension) for a given path extension = glob (string) or regex The basename is given as: file.split(‘.’).first



260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
# File 'lib/spec/mzxml/parser.rb', line 260

def precursor_mz_by_scan_for_path(path, extension, parse_type=nil)
  hash = {}
  Dir.chdir path do
    files = []
    if extension.class == String
      files = Dir[extension]
    elsif extension.class == Regexp
      files = Dir.entries(".").find_all do |dir|
        dir =~ extension
      end
    else
      puts "extension: #{extension} not a String or Regexp!"
    end
    files.each do |file|
      base = file.split('.').first
      hash[base] = precursor_mz_by_scan(file, parse_type)
    end
  end
  hash
end

#scans_by_num(mzXML_file, parse_type = nil) ⇒ Object

Returns an array of scans indexed by scan number NOTE that the first scan (zero indexed) will likely be nil! accepts an optional parse_type = ‘xmlparser’ | ‘rexml’



218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
# File 'lib/spec/mzxml/parser.rb', line 218

def scans_by_num(mzXML_file, parse_type=nil)
  unless parse_type
    parse_type = default_parser
  end
  scans = []
  case parse_type
  when 'xmlparser' 
    parser = Spec::MzXML::XMLParser::TimeMzIntenIndexer.new
    parser.parse(IO.read(mzXML_file))
    scans = parser.scans_by_num
  when 'rexml' # use REXML
    # This is really too slow for files of this size
    doc = REXML::Document.new File.new(mzXML_file)
    doc.elements.each('msRun/scan') do |scan|
      rt = scan.attributes['retentionTime']  ## like PT0.154000S"
      level = scan.attributes['msLevel']
      to_print = []
      prec_mz = nil
      prec_int = nil
      if level.to_i != 1
        scan.elements.each("precursorMz") do |prec|
          prec_mz = prec.text.to_f
          prec_int = prec.attributes["precursorIntensity"].to_f
        end
      end
      # remove the leading PT and trailing S on the retention time!
      rt = rt[2...-1]

      num = scan.attributes['num'].to_i
      scans[num] = Spec::Scan.new(num, scan.attributes['msLevel'].to_i, rt.to_f, prec_mz, prec_int) 
    end #doc.elements
  else
    throw ArgumentError, "invalid parse type: #{parse_type}"
  end
  ## update the scans for parents
  Spec::Scan.add_parent_scan(scans)
  scans
end

#start_and_end_mz(mzxml_file) ⇒ Object

returns [start_mz, end_mz] of the first full scan (ms_level == 1)



387
388
389
390
391
392
393
394
395
396
397
398
# File 'lib/spec/mzxml/parser.rb', line 387

def start_and_end_mz(mzxml_file)
  @fh = File.open(mzxml_file)
  ms_level = 0
  @line = ""
  while ms_level != 1
    ms_level = _el("msLevel").to_i
  end
  start_mz = _el("startMz").to_f
  end_mz = _el("endMz").to_f
  @fh.close
  [start_mz, end_mz]
end

#times_and_spectra(file) ⇒ Object

returns: [times_arr, [m/z,inten,m/z,inten…]] where times are time strings (in seconds)



209
210
211
212
213
# File 'lib/spec/mzxml/parser.rb', line 209

def times_and_spectra(file)
  parser = Spec::MzXML::XMLParser::TimesAndSpectra.new
  parser.parse(IO.read(file))
  parser.times_and_spectra
end