Class: Mspire::Spectrum

Inherits:
Object
  • Object
show all
Includes:
SpectrumLike
Defined in:
lib/mspire/spectrum.rb,
lib/mspire/spectrum/centroid.rb

Overview

note that a point is an [m/z, intensity] doublet. A peak is considered a related string of points

Defined Under Namespace

Modules: Centroidish Classes: Centroid

Constant Summary collapse

DEFAULT_MERGE =
{
  :bin_width => 5,
  :bin_unit => :ppm,
  :normalize => true,
  :return_data => false,
  :split => :share
}

Instance Attribute Summary

Attributes included from SpectrumLike

#centroided, #data_arrays, #ms_level, #precursors, #products, #scans

Class Method Summary collapse

Methods included from SpectrumLike

#==, #[], #centroided?, #find_all_nearest, #find_all_nearest_index, #find_nearest, #find_nearest_index, #initialize, #intensities, #intensities=, #merge, #mzs, #mzs=, #mzs_and_intensities, #normalize, #points, #size, #sort!, #tic

Class Method Details

.from_points(ar_of_doublets) ⇒ Object



22
23
24
25
26
27
28
29
30
# File 'lib/mspire/spectrum.rb', line 22

def from_points(ar_of_doublets)
  _mzs = []
  _ints = []
  ar_of_doublets.each do |mz, int|
    _mzs << mz
    _ints << int
  end
  self.new([_mzs, _ints])
end

.merge(spectra, opts = {}) ⇒ Object

returns a new spectrum which has been merged with the others. If the spectra are centroided (just checks the first one and assumes the others are the same) then it will bin the points (bin width determined by opts) and then segment according to monotonicity (sharing intensity between abutting points). The final m/z is the weighted averaged of all the m/z’s in each peak. Valid opts (with default listed first):

:bin_width => 5 
:bin_unit => :ppm | :amu        interpret bin_width as ppm or amu
:bins => array of Bin objects   for custom bins (overides other bin options)
:normalize => false             if true, divides total intensity by 
                                number of spectra
:return_data => false           returns a parallel array containing
                                the peaks associated with each returned point
:split => false | :share | :greedy_y   see Mspire::Peak#split

The binning algorithm is the fastest possible algorithm that would allow for arbitrary, non-constant bin widths (a ratcheting algorithm O(n + m))



52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
# File 'lib/mspire/spectrum.rb', line 52

def merge(spectra, opts={})
  opt = DEFAULT_MERGE.merge(opts)
  (spectrum, returned_data) =  
    unless spectra.first.centroided? == false
      # find the min and max across all spectra
      first_mzs = spectra.first.mzs
      min = first_mzs.first ; max = first_mzs.last
      spectra.each do |spectrum| 
        mzs = spectrum.mzs
        min = mzs.first if mzs.first < min
        max = mzs.last if mzs.last > max
      end

      # Create Bin objects
      bins = 
        if opt[:bins]
          opt[:bins]
        else
          divisions = []
          bin_width = opt[:bin_width]
          use_ppm = (opt[:bin_unit] == :ppm)
          current_mz = min
          loop do
            if current_mz >= max
              divisions << max
              break
            else
              divisions << current_mz
              current_mz += ( use_ppm ? current_mz./(1e6).*(bin_width) : bin_width )
            end
          end
          # make each bin exclusive so there is no overlap
          bins = divisions.each_cons(2).map {|pair| Bin.new(*pair, true) }
          # make the last bin *inclusive* of the terminating value
          bins[-1] = Bin.new(bins.last.begin, bins.last.end)
          bins
        end

      spectra.each do |spectrum|
        Bin.bin(bins, spectrum.points, &:first)
      end

      pseudo_points = bins.map do |bin|
        #int = bin.data.reduce(0.0) {|sum,point| sum + point.last }.round(3)   # <- just for info:
        [bin, bin.data.reduce(0.0) {|sum,point| sum + point.last }]
      end

      #p_mzs = [] 
      #p_ints = [] 
      #p_num_points = [] 
      #pseudo_points.each do |psp|
      #  p_mzs << ((psp.first.begin + psp.first.end)/2)
      #  p_ints << psp.last
      #  p_num_points <<  psp.first.data.size
      #end

      #File.write("file_#{opt[:bin_width]}_to_plot.txt", [p_mzs, p_ints, p_num_points].map {|ar| ar.join(' ') }.join("\n"))
      #abort 'here'


      peaks = Mspire::Peak.new(pseudo_points).split(opt[:split])

      return_data = []
      _mzs = [] ; _ints = []

      #p peaks[97]
      #puts "HIYA"
      #abort 'here'

      peaks.each_with_index do |peak,i|
        #peaks.each do |peak|
        tot_intensity = peak.map(&:last).reduce(:+)
        return_data_per_peak = [] if opt[:return_data]
        weighted_mz = 0.0
        peak.each do |point|
          pre_scaled_intensity = point[0].data.reduce(0.0) {|sum,v| sum + v.last }
          post_scaled_intensity = point[1]
          # some peaks may have been shared.  In this case the intensity
          # for that peak was downweighted.  However, the actually data
          # composing that peak is not altered when the intensity is
          # shared.  So, to calculate a proper weighted avg we need to
          # downweight the intensity of any data point found within a bin
          # whose intensity was scaled.
          correction_factor = 
            if pre_scaled_intensity != post_scaled_intensity
              post_scaled_intensity / pre_scaled_intensity
            else
              1.0
            end

          return_data_per_peak.push(*point[0].data) if opt[:return_data]

          point[0].data.each do |lil_point|
            weighted_mz += lil_point[0] * ( (lil_point[1].to_f * correction_factor) / tot_intensity)
          end
        end
        return_data << return_data_per_peak if opt[:return_data]
        _mzs << weighted_mz
        _ints << tot_intensity
      end
      [Spectrum.new([_mzs, _ints]), return_data]
    else
      raise NotImplementedError, "the way to do this is interpolate the profile evenly and sum"
    end

  if opt[:normalize]
    sz = spectra.size
    spectrum.intensities.map! {|v| v.to_f / sz }
  end
  if opt[:return_data]
    $stderr.puts "returning spectrum (#{spectrum.mzs.size}) and data" if $VERBOSE
    [spectrum, return_data]
  else
    $stderr.puts "returning spectrum (#{spectrum.mzs.size})" if $VERBOSE
    spectrum
  end
end