Class: Sequest::PepXML

Inherits:
Object
  • Object
show all
Includes:
SpecIDXML
Defined in:
lib/ms/sequest/pepxml.rb,
lib/ms/sequest/pepxml.rb

Defined Under Namespace

Classes: AAModification, MSMSPipelineAnalysis, MSMSRunSummary, Modifications, Parameters, SearchDatabase, SearchHit, SearchResult, SearchSummary, SpectrumQuery, TerminalModification

Constant Summary collapse

DEF_VERSION =
18
Default_Options =
{
  :out_path => '.',
  #:backup_db_path => '.',
  # a PepXML option
  :pepxml_version => DEF_VERSION,  
  ## MSMSRunSummary options:
  # string must be recognized in sample_enzyme.rb 
  # or create your own SampleEnzyme object
  :ms_manufacturer => 'ThermoFinnigan',
  :ms_model => 'LCQ Deca XP Plus',
  :ms_ionization => 'ESI',
  :ms_mass_analyzer => 'Ion Trap',
  :ms_detector => 'UNKNOWN',
  :ms_data => '.',      # path to ms data files (raw or mzxml)
  :raw_data_type => "raw",
  :raw_data => ".mzXML", ## even if you don't have it?
  ## SearchSummary options:
  :out_data_type => "out", ## may be srf?? don't think pepxml recognizes this yet
  :out_data => ".tgz", ## may be srf??
  :copy_mzxml => false, # copy the mzxml file to the out_path (create it if necessary)
  :print => false, # print the objects to file
}

Class Attribute Summary collapse

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(pepxml_version = DEF_VERSION, sequest_params_obj = nil) ⇒ PepXML

msms_pipeline_analysis is set to the result of the yielded block and set_mono_or_avg is called with params if given



189
190
191
192
193
194
195
196
197
198
# File 'lib/ms/sequest/pepxml.rb', line 189

def initialize(pepxml_version=DEF_VERSION, sequest_params_obj=nil)
  self.class.pepxml_version = pepxml_version
  if sequest_params_obj
    set_mono_or_avg(sequest_params_obj)
  end
  if block_given?
    @msms_pipeline_analysis = yield
    @base_name = @msms_pipeline_analysis.msms_run_summary.base_name
  end
end

Class Attribute Details

.pepxml_versionObject

Returns the value of attribute pepxml_version.



169
170
171
# File 'lib/ms/sequest/pepxml.rb', line 169

def pepxml_version
  @pepxml_version
end

Instance Attribute Details

#avg_parentObject

Returns the value of attribute avg_parent.



178
179
180
# File 'lib/ms/sequest/pepxml.rb', line 178

def avg_parent
  @avg_parent
end

#base_nameObject

the full path name (no extension)



176
177
178
# File 'lib/ms/sequest/pepxml.rb', line 176

def base_name
  @base_name
end

#h_plusObject

Returns the value of attribute h_plus.



177
178
179
# File 'lib/ms/sequest/pepxml.rb', line 177

def h_plus
  @h_plus
end

#msms_pipeline_analysisObject

Returns the value of attribute msms_pipeline_analysis.



174
175
176
# File 'lib/ms/sequest/pepxml.rb', line 174

def msms_pipeline_analysis
  @msms_pipeline_analysis
end

#pepxml_versionObject

Returns the value of attribute pepxml_version.



174
175
176
# File 'lib/ms/sequest/pepxml.rb', line 174

def pepxml_version
  @pepxml_version
end

Class Method Details

._prot_num_and_first_prot_by_pep(pep_array) ⇒ Object

updates the private attrs _num_prots and _first_prot on bioworks pep objects. Ideally, we’d like these attributes to reside elsewhere, but for memory concerns, this is best for now.



242
243
244
245
246
247
248
249
250
251
252
253
# File 'lib/ms/sequest/pepxml.rb', line 242

def self._prot_num_and_first_prot_by_pep(pep_array)
  pep_array.hash_by(:aaseq).each do |aasq, pep_arr|
    prts = []
    pep_arr.each { |pep| prts.push( *(pep.prots) ) }
    prts.uniq!
    _size = prts.size 
    pep_arr.each do |pep|
      pep._num_prots = _size
      pep._first_prot = prts.first
    end
  end
end

.base_name_noext(file) ⇒ Object

given any kind of filename (from windows or whatever) returns the base of the filename with no file extension



769
770
771
772
# File 'lib/ms/sequest/pepxml.rb', line 769

def self.base_name_noext(file)
  file.gsub!("\\", '/')
  File.basename(file).sub(/\.[\w^\.]+$/, '')
end

.make_base_name(path, filename) ⇒ Object

combines filename in a manner consistent with the path



744
745
746
747
748
749
750
751
752
753
754
# File 'lib/ms/sequest/pepxml.rb', line 744

def self.make_base_name(path, filename)
  sep = '/'
  if path.split('/').size < path.split("\\").size
    sep = "\\"
  end
  if path.split('').last == sep
    path + File.basename(filename)
  else
    path + sep + File.basename(filename)
  end
end

.new_from_srf(srf, opts = {}) ⇒ Object

will dynamically set :ms_model and :ms_mass_analyzer from srf info (ignoring defaults or anything passed in) for LTQ Orbitrap and LCQ Deca XP See SRF::Sequest::PepXML::Default_Options hash for defaults unless given, the out_path will be given as the path of the srf_file srf may be an object or a filename



285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
# File 'lib/ms/sequest/pepxml.rb', line 285

def self.new_from_srf(srf, opts={})
  opts = Default_Options.merge(opts)

  ## read the srf file
  if srf.is_a? String
    srf = SRF.new(srf)
  end

  ## set the outpath
  out_path = opts.delete(:out_path)

  params = srf.params

  ## check to see if we need backup_db
  backup_db_path = opts.delete(:backup_db_path)
  if !File.exist?(params.database) && backup_db_path
    params.database_path = backup_db_path
  end

  #######################################################################
  # PREPARE THE OPTIONS:
  #######################################################################
  ## remove items from the options hash that don't belong to 
  ppxml_version = opts.delete(:pepxml_version)
  out_data_type = opts.delete(:out_data_type)
  out_data = opts.delete(:out_data)

  ## Extract meta info from srf
  bn_noext = base_name_noext(srf.header.raw_filename)
  opts[:ms_model] = srf.header.model
  case opts[:ms_model]
  when /Orbitrap/
    opts[:ms_mass_analyzer] = 'Orbitrap'
  when /LCQ Deca XP/
    opts[:ms_mass_analyzer] = 'Ion Trap'
  end

  ## Create the base name
  full_base_name_no_ext = make_base_name( File.expand_path(out_path), bn_noext)
  opts[:base_name] = full_base_name_no_ext

  ## Create the search summary:
  search_summary_options = {
    :search_database => Sequest::PepXML::SearchDatabase.new(params),
    :base_name => full_base_name_no_ext,
    :out_data_type => out_data_type,
    :out_data => out_data
  }
  modifications_string = srf.header.modifications
  search_summary = Sequest::PepXML::SearchSummary.new( params, modifications_string, search_summary_options)

  # create the sample enzyme from the params object:
  sample_enzyme_obj = 
    if opts[:sample_enzyme]
      opts[:sample_enzyme]
    else
      params.sample_enzyme
    end
  opts[:sample_enzyme] = sample_enzyme_obj

  ## Create the pepxml obj and top level objects
  pepxml_obj = Sequest::PepXML.new(ppxml_version, params) 
  pipeline = Sequest::PepXML::MSMSPipelineAnalysis.new({:date=>nil,:summary_xml=> bn_noext +'.xml'})
  pepxml_obj.msms_pipeline_analysis = pipeline
  pipeline.msms_run_summary = Sequest::PepXML::MSMSRunSummary.new(opts)
  pipeline.msms_run_summary.search_summary = search_summary
  modifications_obj = search_summary.modifications

  ## name some common variables we'll need
  h_plus = pepxml_obj.h_plus
  avg_parent = pepxml_obj.avg_parent


  ## COPY MZXML FILES IF NECESSARY
  if opts[:copy_mzxml]
    mzxml_pathname_noext = File.join(opts[:ms_data], bn_noext)
    to_copy = MS::Converter::MzXML.file_to_mzxml(mzxml_pathname_noext)
    if to_copy
      FileUtils.cp to_copy, out_path
    else
      puts "Couldn't file mzXML file with base: #{mzxml_pathname_noext}"
      puts "Perhaps you need to specifiy the location of the raw data"
      puts "or need an mzXML converter (readw or t2x)"
      exit
    end
  end


  #######################################################################
  # CREATE the spectrum_queries_ar
  #######################################################################
  srf_index = srf.index
  out_files = srf.out_files
  spectrum_queries_arr = Array.new(srf.dta_files.size)
  files_with_hits_index = 0  ## will end up being 1 indexed

  deltacn_orig = opts[:deltacn_orig]
  deltacn_index = 
    if deltacn_orig ; 20
    else 19
    end

  srf.dta_files.each_with_index do |dta_file,dta_i|
    next if out_files[dta_i].num_hits == 0
    files_with_hits_index += 1

    precursor_neutral_mass = dta_file.mh - h_plus

    (start_scan, end_scan, charge) = srf_index[dta_i]
    sq_hash = {
      :spectrum => [bn_noext, start_scan, end_scan, charge].join('.'),
      :start_scan => start_scan,
      :end_scan => end_scan,
      :precursor_neutral_mass => precursor_neutral_mass,
      :assumed_charge => charge.to_i,
      :pepxml_version => ppxml_version,
      :index => files_with_hits_index,
    }

    spectrum_query = Sequest::PepXML::SpectrumQuery.new(sq_hash)


    hits = out_files[dta_i].hits

    search_hits = 
      if opts[:all_hits]
        Array.new(out_files[dta_i].num_hits)  # all hits
      else
        Array.new(1)  # top hit only
      end

    (0...(search_hits.size)).each do |hit_i|
      hit = hits[hit_i]
      # under the modified deltacn schema (like bioworks)
      # Get proper deltacn and deltacnstar
      # under new srf, deltacn is already corrected for what prophet wants,
      # deltacn_orig_updated is how to access the old one
      # Prophet deltacn is not the same as the native Sequest deltacn
      # It is the deltacn of the second best hit!

      ## mass calculations:
      calc_neutral_pep_mass = hit[0] - h_plus


      sequence = hit.sequence

      #  NEED TO MODIFY SPLIT SEQUENCE TO DO MODS!
      ## THIS IS ALL INNER LOOP, so we make every effort at speed here:
      (prevaa, pepseq, nextaa) = SpecID::Pep.prepare_sequence(sequence)
      # 0=mh 1=deltacn_orig 2=sp 3=xcorr 4=id 5=num_other_loci 6=rsp 7=ions_matched 8=ions_total 9=sequence 10=prots 11=deltamass 12=ppm 13=aaseq 14=base_name 15=first_scan 16=last_scan 17=charge 18=srf 19=deltacn 20=deltacn_orig_updated

      sh_hash = {
        :hit_rank => hit_i+1,
        :peptide => pepseq,
        :peptide_prev_aa => prevaa,
        :peptide_next_aa => nextaa,
        :protein => hit[10].first.reference.split(" ").first, 
        :num_tot_proteins => hit[10].size,
        :num_matched_ions => hit[7],
        :tot_num_ions => hit[8],
        :calc_neutral_pep_mass => calc_neutral_pep_mass,
        :massdiff => precursor_neutral_mass - calc_neutral_pep_mass, 
        :num_tol_term => sample_enzyme_obj.num_tol_term(sequence),
        :num_missed_cleavages => sample_enzyme_obj.num_missed_cleavages(pepseq),
        :is_rejected => 0,
        # These are search score attributes:
        :xcorr => hit[3],
        :deltacn => hit[deltacn_index],
        :spscore => hit[2],
        :sprank => hit[6],
        :modification_info => modifications_obj.modification_info(SpecID::Pep.split_sequence(sequence)[1]),
      }
      unless deltacn_orig
        sh_hash[:deltacnstar] = 
          if hits[hit_i+1].nil?  # no next hit? then its deltacnstar == 1
          '1'
          else
          '0'
          end
      end
      search_hits[hit_i] = Sequest::PepXML::SearchHit.new(sh_hash) # there can be multiple hits
    end

    search_result = Sequest::PepXML::SearchResult.new
    search_result.search_hits = search_hits
    spectrum_query.search_results = [search_result]
    spectrum_queries_arr[files_with_hits_index] = spectrum_query
  end
  spectrum_queries_arr.compact!

  pipeline.msms_run_summary.spectrum_queries = spectrum_queries_arr 
  pepxml_obj.base_name = pipeline.msms_run_summary.base_name
  pipeline.msms_run_summary.spectrum_queries =  spectrum_queries_arr 

  pepxml_obj
end

.set_from_bioworks(bioworks_file, opts = {}) ⇒ Object

takes an .srg or bioworks.xml file if possible, ensures that an mzXML file is present for each pepxml file :print => true, will print files NOTES: num_tol_term and num_missing_cleavages are both calculated from the sample_enzyme. Thus, a No_Enzyme search may still pass in a :sample_enzyme option to get these calculated.



488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
# File 'lib/ms/sequest/pepxml.rb', line 488

def self.set_from_bioworks(bioworks_file, opts={})
opts = Default_Options.merge(opts)
## Create the out_path directory if necessary

  unless File.exist? opts[:out_path]
    FileUtils.mkpath(opts[:out_path])
  end
  unless File.directory? opts[:out_path]
    abort "#{opts[:out_path]} must be a directory!"
  end

  spec_id = SpecID.new(bioworks_file)
  pepxml_objs = 
    if spec_id.is_a? Bioworks
      abort("must have opts[:params] set!") unless opts[:params]
      set_from_bioworks_xml(bioworks_file, opts[:params], opts)
    elsif spec_id.is_a? SRFGroup
      spec_id.srfs.map do |srf|
        new_from_srf(srf, opts) 
      end
    else
      abort "invalid object"
    end

  if opts[:print]
    pepxml_objs.each do |obj|
      obj.to_pepxml(obj.base_name + ".xml")
    end
  end
  pepxml_objs
end

.set_from_bioworks_xml(bioworks, params, opts = {}) ⇒ Object

Takes bioworks 3.2/3.3 xml output (with no filters) Returns a list of PepXML objects params = sequest.params file bioworks = bioworks.xml exported multi-consensus view file pepxml_version = 0 for tpp 1.2.3 pepxml_version = 18 for tpp 2.8.2, 2.8.3, 2.9.2



527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
# File 'lib/ms/sequest/pepxml.rb', line 527

def self.set_from_bioworks_xml(bioworks, params, opts={})
  opts = Default_Options.merge(opts)
  pepxml_version, ms_manufacturer, ms_model, ms_ionization, ms_mass_analyzer, ms_detector, raw_data_type, raw_data, out_data_type, out_data, ms_data, out_path = opts.values_at(:pepxml_version, :ms_manufacturer, :ms_model, :ms_ionization, :ms_mass_analyzer, :ms_detector, :raw_data_type, :raw_data, :out_data_type, :out_data, :ms_data, :out_path)



  unless out_path
    out_path = '.'
  end

  supported_versions = [18]

  unless supported_versions.include?(opts[:pepxml_version]) 
    abort "pepxml_version: #{pepxml_version} not currently supported.  Current support is for versions #{supported_versions.join(', ')}"
  end

  ## Turn params and bioworks_obj into objects if necessary:
  # Params:
  if params.class == Sequest::Params  # OK!
  elsif params.class == String ; params = Sequest::Params.new(params)
  else                         ; abort "Don't recognize #{params} as object or string!"
  end
  # Bioworks:
  if bioworks.class == Bioworks  # OK!
  elsif bioworks.class == String ; bioworks = SpecID.new(bioworks)
  else                           ; abort "Don't recognize #{bioworks} as object or string!"
  end

  sample_enzyme_obj = 
    if opts[:sample_enzyme]
      opts[:sample_enzyme]
    else
      params.sample_enzyme
    end

  #puts "bioworks.peps.size: #{bioworks.peps.size}"; #puts "bioworks.prots.size: #{bioworks.prots.size}"; #puts "Bioworks.version: #{bioworks.version}"

  ## TURN THIS ON IF YOU THINK YOU MIGHT NOT BE GETTING PEPTIDES from
  ## bioworks
  #bioworks.peps.each { |pep| if pep.class != Bioworks::Pep ; puts "trying to pass as pep: "; p pep; abort "NOT a pep!" end }

  ## check to see if we need backup_db

  backup_db_path = opts.delete(:backup_db_path)
  if !File.exist?(params.database) && backup_db_path
    params.database_path = backup_db_path
  end

  ## Start
  split_bio_objs = []

  ## (num_prots_by_pep, prot_by_pep) = 
  #num_prots_by_pep.each do |k,v| puts "k: #{k} v: #{v}\n"; break end ; prot_by_pep.each do |k,v| puts "k: #{k} v: #{v}" ; break end ; abort "HERE"

  modifications_string = bioworks.modifications

  ## Create a hash of spectrum_query arrays by filename (this very big block):
  spectrum_queries_by_base_name = {}
  # Hash by the filenames to split into filenames:
  pepxml_objects = bioworks.peps.hash_by(:base_name).map do |base_name, pep_arr|

    search_summary = Sequest::PepXML::SearchSummary.new(params, modifications_string, {:search_database => Sequest::PepXML::SearchDatabase.new(params), :out_data_type => out_data_type, :out_data => out_data})
    modifications_obj = search_summary.modifications

    pepxml_obj = Sequest::PepXML.new(pepxml_version, params)
    full_base_name_no_ext = self.make_base_name( File.expand_path(out_path), base_name)

    case pepxml_version
    when 18
      pipeline =  Sequest::PepXML::MSMSPipelineAnalysis.new({:date=>nil,:summary_xml=>base_name+'.xml'})
      msms_run_summary = Sequest::PepXML::MSMSRunSummary.new({
        :base_name => full_base_name_no_ext,
        :ms_manufacturer => ms_manufacturer,
        :ms_model => ms_model,
        :ms_ionization => ms_ionization,
        :ms_mass_analyzer => ms_mass_analyzer,
        :ms_detector => ms_detector,
        :raw_data_type => raw_data_type,
        :raw_data => raw_data,
        :sample_enzyme => sample_enzyme_obj, # usually, params.sample_enzyme,
        :search_summary => search_summary,
      }) 
      pipeline.msms_run_summary = msms_run_summary
      pepxml_obj.msms_pipeline_analysis = pipeline
      pepxml_obj.msms_pipeline_analysis.msms_run_summary.search_summary.base_name =  full_base_name_no_ext
      pepxml_obj.base_name = full_base_name_no_ext
      pepxml_obj 
    end

    # Create a hash by pep object containing num_tot_proteins
    # This is only valid if all hits are present (no previous thresholding)
    # Since out2summary only acts on one folder at a time,
    # we should only do it for one folder at a time! (that's why we do this
    # here instead of globally)
    self._prot_num_and_first_prot_by_pep(pep_arr)
    prec_mz_arr = nil
    case x = bioworks.version
    when /3.2/ 
      calc_prec_by = :prec_mz_arr
      # get the precursor_mz array for this filename
      mzxml_file = MS::Converter::MzXML.file_to_mzxml(File.join(ms_data, base_name))
      prec_mz_arr = MS::MSRun.precursor_mz_by_scan_num(mzxml_file)
    when /3.3/
      calc_prec_by = :deltamass
    else
      abort "invalid BioworksBrowser version: #{x}"
    end

    if opts[:copy_mzxml]
      to_copy = MS::Converter::MzXML.file_to_mzxml(File.join(ms_data, base_name))
      if to_copy
        FileUtils.cp to_copy, out_path
      end
    end


    spectrum_queries_ar = pep_arr.hash_by(:first_scan, :last_scan, :charge).map do |key,arr|


      # Sort_by_rank and take the top hit (to mimick out2summary):

      arr = arr.sort_by {|pep| pep.xcorr.to_f } # ascending
      top_pep = arr.pop
      second_hit = arr.last # needed for deltacnstar


      case calc_prec_by
      when :prec_mz_arr
        precursor_neutral_mass = Sequest::PepXML::SpectrumQuery.calc_precursor_neutral_mass(calc_prec_by, top_pep.first_scan.to_i, top_pep.last_scan.to_i, prec_mz_arr, top_pep.charge, pepxml_obj.avg_parent)
      when :deltamass
        precursor_neutral_mass = Sequest::PepXML::SpectrumQuery.calc_precursor_neutral_mass(calc_prec_by, top_pep.mass.to_f, top_pep.deltamass.to_f, pepxml_obj.avg_parent)
      end

      calc_neutral_pep_mass = (top_pep.mass.to_f - pepxml_obj.h_plus)

      # deltacn & star:
      # (NOTE: OLD?? out2summary wants the deltacn of the 2nd best hit.)
      if second_hit 
        #top_pep.deltacn = second_hit.deltacn 
        deltacnstar = '0'
      else 
        top_pep.deltacn = '1.0'
        deltacnstar = '1'
      end
      # Create the nested structure of queries{results{hits}}
      # (Ruby's blocks work beautifully for things like this)
      spec_query = Sequest::PepXML::SpectrumQuery.new({
        :spectrum => [top_pep.base_name, top_pep.first_scan, top_pep.last_scan, top_pep.charge].join("."),
        :start_scan => top_pep.first_scan,
        :end_scan => top_pep.last_scan,
        :precursor_neutral_mass => precursor_neutral_mass,
        :assumed_charge => top_pep.charge,
        :pepxml_version => pepxml_version,
      }) 


      search_result = Sequest::PepXML::SearchResult.new 
      #puts "set MASSDIFF: "
      #p precursor_neutral_mass - calc_neutral_pep_mass
      ## Calculate some interdependent values;
      # NOTE: the bioworks mass is reallyf M+H if two or more scans went
      # into the search_hit; calc_neutral_pep_mass is simply the avg of
      # precursor masses adjusted to be neutral
      (prevaa, pepseq, nextaa) = SpecID::Pep.prepare_sequence(top_pep.sequence)
      (num_matched_ions, tot_num_ions) = Sequest::PepXML::SearchHit.split_ions(top_pep.ions)
      search_hit = Sequest::PepXML::SearchHit.new({
        :hit_rank => 1,
        :peptide => pepseq,
        :peptide_prev_aa => prevaa,
        :peptide_next_aa => nextaa,
        :protein => top_pep._first_prot.reference.split(" ").first, 
        :num_tot_proteins => top_pep._num_prots,
        :num_matched_ions => num_matched_ions,
        :tot_num_ions => tot_num_ions,
        :calc_neutral_pep_mass => calc_neutral_pep_mass,
        :massdiff => precursor_neutral_mass - calc_neutral_pep_mass,
        :num_tol_term => sample_enzyme_obj.num_tol_term(top_pep.sequence),
        :num_missed_cleavages => sample_enzyme_obj.num_missed_cleavages(pepseq),
        :is_rejected => 0,
        # These are search score attributes:
        :xcorr => top_pep.xcorr,
        :deltacn => top_pep.deltacn,
        :deltacnstar => deltacnstar,
        :spscore => top_pep.sp,
        :sprank => top_pep.rsp,
        :modification_info => modifications_obj.modification_info(SpecID::Pep.split_sequence(top_pep.sequence)[1]),
        :spectrum_query => spec_query,
      })
      search_result.search_hits = [search_hit] # there can be multiple search hits
      spec_query.search_results = [search_result]  # can be multiple search_results
      spec_query
    end

    # create an index by spectrum as results end up typically in out2summary
    # (I really dislike this order, however)
    spectrum_queries_ar = spectrum_queries_ar.sort_by {|pep| pep.spectrum }
    spectrum_queries_ar.each_with_index {|res,index| res.index = "#{index + 1}" }
    pipeline.msms_run_summary.spectrum_queries = spectrum_queries_ar
    pepxml_obj
  end ## collects pepxml_objs
  # summary_xml is the short basename of the pepxml file (e.g., "020.xml")
  pepxml_objects.sort_by {|obj| obj.summary_xml }
end

Instance Method Details

#dateObject



213
214
215
# File 'lib/ms/sequest/pepxml.rb', line 213

def date
  Time.new.to_s
end

#doctypeObject

for pepxml_version == 0



222
223
224
# File 'lib/ms/sequest/pepxml.rb', line 222

def doctype
  '<!DOCTYPE msms_pipeline_analysis SYSTEM "/usr/bin/msms_analysis3.dtd">' + "\n"
end

#fragment_mass_typeObject



739
740
741
# File 'lib/ms/sequest/pepxml.rb', line 739

def fragment_mass_type
  @params.fragment_mass_type
end

#headerObject



233
234
235
236
237
# File 'lib/ms/sequest/pepxml.rb', line 233

def header
  case self.class.pepxml_version
  when 18 ; xml_version + style_sheet
  end
end

#precursor_mass_typeObject



735
736
737
# File 'lib/ms/sequest/pepxml.rb', line 735

def precursor_mass_type
  @params.precursor_mass_type
end

#set_mono_or_avg(sequest_params_obj) ⇒ Object

sets @h_plus and @avg_parent from the sequest params object



201
202
203
204
205
206
207
208
209
210
211
# File 'lib/ms/sequest/pepxml.rb', line 201

def set_mono_or_avg(sequest_params_obj)
  case sequest_params_obj.precursor_mass_type
  when "monoisotopic" ; @avg_parent = false
  else ; @avg_parent = true
  end

  case @avg_parent
  when true ; @h_plus = SpecID::AVG[:h_plus]
  when false ; @h_plus = SpecID::MONO[:h_plus]
  end
end

#spectrum_queriesObject

returns an array of spectrum queries



183
184
185
# File 'lib/ms/sequest/pepxml.rb', line 183

def spectrum_queries
  msms_pipeline_analysis.msms_run_summary.spectrum_queries
end

#style_sheetObject



226
227
228
229
230
231
# File 'lib/ms/sequest/pepxml.rb', line 226

def style_sheet
  case self.class.pepxml_version
  when 18
  '<?xml-stylesheet type="text/xsl" href="/tools/bin/TPP/tpp/schema/pepXML_std.xsl"?>'
  end
end

#summary_xmlObject



731
732
733
# File 'lib/ms/sequest/pepxml.rb', line 731

def summary_xml
  base_name + ".xml"
end

#to_pepxml(file = nil) ⇒ Object

outputs pepxml, (to file if given)



757
758
759
760
761
762
763
764
765
# File 'lib/ms/sequest/pepxml.rb', line 757

def to_pepxml(file=nil)
  string = header
  string << @msms_pipeline_analysis.to_pepxml

  if file
    File.open(file, "w") do |fh| fh.print string end
  end
  string
end

#xml_versionObject



217
218
219
# File 'lib/ms/sequest/pepxml.rb', line 217

def xml_version 
  '<?xml version="1.0" encoding="UTF-8"?>' + "\n"
end