ms-ident

Working with mass spectrometry based peptide/protein identifications. Includes support for building pepxml files.

Planned support for mzIdentML and reading pepxml.

Synposis

Generating a pepxml file

This example shows a very block oriented way of constructing a pepxml. Objects or empty data structures are passed into blocks for subcategories to use. Because there are a lot of attributes to manage, most objects accept upon initialization or later with ‘merge!’

pepxml = Pepxml.new do |msms_pipeline_analysis|
  msms_pipeline_analysis.merge!(:summary_xml => "020.xml") do |msms_run_summary|
    # prep the sample enzyme and search_summary
    msms_run_summary.merge!(
      :base_name => '/home/jtprince/dev/mspire/020', 
      :ms_manufacturer => 'Thermo', 
      :ms_model => 'LTQ Orbitrap', 
      :ms_ionization => 'ESI', 
      :ms_mass_analyzer => 'Ion Trap', 
      :ms_detector => 'UNKNOWN'
    ) do |sample_enzyme, search_summary, spectrum_queries|
      sample_enzyme.merge!(:name=>'Trypsin',:cut=>'KR',:no_cut=>'P',:sense=>'C')
      search_summary.merge!(
        :base_name=>'/path/to/file/020',
        :search_engine => 'SEQUEST',
        :precursor_mass_type =>'monoisotopic',
        :fragment_mass_type => 'average'
      ) do |search_database, enzymatic_search_constraint, modifications, parameters|
        search_database.merge!(:local_path => '/path/to/db.fasta', :seq_type => 'AA') # note seq_type == type
        enzymatic_search_constraint.merge!(
          :enzyme => 'Trypsin', 
          :max_num_internal_cleavages => 2,
          :min_number_termini => 2
        )
        modifications << Pepxml::AminoacidModification.new(
          :aminoacid => 'M', :massdiff => 15.9994, :mass => Ms::Mass::AA::MONO['M']+15.9994,
          :variable => 'Y', :symbol => '*')
          # invented, for example, a protein terminating mod
        modifications << Pepxml::TerminalModification.new( 
          :terminus => 'c', :massdiff => 23.3333, :mass => Ms::Mass::MONO['oh'] + 23.3333, 
          :variable => 'Y', :symbol => '[', :protein_terminus => 'c', 
          :description => 'leave protein_terminus off if not protein mod'
        )
        modifications << Pepxml::TerminalModification.new( 
          :terminus => 'c', :massdiff => 25.42322, :mass => Ms::Mass::MONO['h+'] + 25.42322, 
          :variable => 'N', :symbol => ']', :description => 'example: c term mod'
        )
        parameters.merge!( 
                          :fragment_ion_tolerance => 1.0000, 
                          :digest_mass_range => '600.0 3500.0', 
                          :enzyme_info => 'Trypsin(KR/P) 1 1 KR P', # etc.... 
                         )
      end
      spectrum_query1 = Pepxml::SpectrumQuery.new(
        :spectrum  => '020.3.3.1', :start_scan => 3, :end_scan => 3, 
        :precursor_neutral_mass => 1120.93743421875, :assumed_charge => 1
      ) do |search_results|
        search_result1 = Pepxml::SearchResult.new do |search_hits|
          modpositions = [[1, 243.1559], [6, 167.0581], [7,181.085]].map do |pair|  
            Pepxml::SearchHit::ModificationInfo::ModAminoacidMass.new(*pair)
          end
          # order(modified_peptide, mod_aminoacid_masses, :mod_nterm_mass, :mod_cterm_mass)
          # or can be set by hash
          mod_info = Pepxml::SearchHit::ModificationInfo.new('Y#RLGGS#T#K', modpositions)
          search_hit1 = Pepxml::SearchHit.new( 
            :hit_rank=>1, :peptide=>'YRLGGSTK', :peptide_prev_aa => "R", :peptide_next_aa => "K",
            :protein => "gi|16130113|ref|NP_416680.1|", :num_tot_proteins => 1, :num_matched_ions => 5,
            :tot_num_ions => 35, :calc_neutral_pep_mass => 1120.93163442, :massdiff => 0.00579979875010395,
            :num_tol_term => 2, :num_missed_cleavages => 1, :is_rejected => 0, 
            :modification_info => mod_info) do |search_scores|
              search_scores.merge!(:xcorr => 0.12346, :deltacn => 0.7959, :deltacnstar => 0, 
                                 :spscore => 29.85, :sprank => 1)
            end
          search_hits << search_hit1
        end
        search_results << search_result1
      end
      spectrum_queries << spectrum_query1
    end
  end
end
puts pepxml.to_xml

The block is optional in initalization or with merge! You can just as easily set the needed attributes directly.

msms_run_summary.new(:search_summary => my_search_summary, :spectrum_queries => spec_queries)
msms_run_summary.sample_enzyme = sample_enzyme_object

see LICENSE