Class: Mspire::Lipid::Search

Inherits:
Object
  • Object
show all
Defined in:
lib/mspire/lipid/search.rb,
lib/mspire/lipid/search/bin.rb,
lib/mspire/lipid/search/hit.rb,
lib/mspire/lipid/search/query.rb,
lib/mspire/lipid/search/db_isobar_group.rb,
lib/mspire/lipid/search/probability_distribution.rb

Defined Under Namespace

Classes: Bin, DBIsobarGroup, Hit, HitGroup, ProbabilityDistribution, Query

Constant Summary collapse

STANDARD_MODIFICATIONS =
{
  :proton => [1,2],
  :ammonium => [1],
  :lithium => [1],
  :water => [1,2],
}
STANDARD_SEARCH =
{
  :units => :ppm,
  :query_min_count_per_bin => 500,  # min number of peaks per bin
  :num_rand_samples_per_bin => 1000,
  :num_nearest => 2,
  :return_order => :as_given,  # or :sorted 
}

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(ions = [], opts = {}) ⇒ Search

ions are Mspire::Lipid::Ion objects each one should give a non-nil m/z value



57
58
59
60
61
# File 'lib/mspire/lipid/search.rb', line 57

def initialize(ions=[], opts={})
  @options = STANDARD_SEARCH.merge(opts)
  @db_isobar_spectrum = create_db_isobar_spectrum(ions)
  @search_function = create_search_function(ions, @options)
end

Instance Attribute Details

#optionsObject

Returns the value of attribute options.



26
27
28
# File 'lib/mspire/lipid/search.rb', line 26

def options
  @options
end

#search_functionObject

Returns the value of attribute search_function.



27
28
29
# File 'lib/mspire/lipid/search.rb', line 27

def search_function
  @search_function
end

Class Method Details

.generate_simple_queries(lipids, mods = STANDARD_MODIFICATIONS, gain_and_loss = false) ⇒ Object

will generate PossibleLipid objects and return a new search object uses only one kind of loss at a time and one type of gain at a time will also do the combination of a gain and a loss if gain_and_loss is true



33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# File 'lib/mspire/lipid/search.rb', line 33

def self.generate_simple_queries(lipids, mods=STANDARD_MODIFICATIONS, gain_and_loss=false)
  possible_lipids = []
  real_mods_and_cnts = mods.map {|name, cnts| [Mspire::Lipid::Modification.new(name), cnts] }
  # one of each
  real_mods_and_cnts.each do |mod, counts|
    counts.each do |cnt|
      possible_lipids << Mspire::Lipid::Search::Query.new(lipid, Array.new(cnt, mod))
    end
  end
  if gain_and_loss
    # one of each gain + one of each loss
    (gain_mod_cnt_pairs, loss_mod_cnt_pairs) = real_mods_and_cnts.partition {|mod, count| mod.gain }
    gain_mod_cnt_pairs.each do |mod, cnt|
      lipids.each do |lipid|
        #### need to implement still (use combinations or something...)
        get_this_working!
      end
    end
  end
  self.new(possible_lipids)
end

Instance Method Details

#create_db_isobar_spectrum(ions) ⇒ Object

returns a DB isobar spectrum where the m/z values are all the m/z values to search for and the intensities each an array corresponding to all the lipid ions matching that m/z value



144
145
146
147
148
149
# File 'lib/mspire/lipid/search.rb', line 144

def create_db_isobar_spectrum(ions)
  mzs = [] ; query_groups = []
  pairs = ions.group_by(&:mz).sort_by(&:first)
  pairs.each {|mz, ar| mzs << mz ; query_groups << ar }
  Mspire::Spectrum.new([mzs, query_groups])
end

#create_probability_distribution_for_search_bins!(search_bins, db_isobar_spectrum, num_rand_samples_per_bin, use_ppm = true) ⇒ Object

use_ppm uses ppm or amu if false returns the search_bins



153
154
155
156
157
158
159
160
161
162
163
164
165
166
# File 'lib/mspire/lipid/search.rb', line 153

def create_probability_distribution_for_search_bins!(search_bins, db_isobar_spectrum, num_rand_samples_per_bin, use_ppm=true)
  search_bins.each do |search_bin| 
    rng = Random.new
    random_mzs = num_rand_samples_per_bin.times.map { rng.rand(search_bin.to_range)  }
    # find the deltas
    diffs = random_mzs.map do |random_mz| 
      nearest_random_mz = db_isobar_spectrum.find_nearest(random_mz)
      delta = (random_mz - nearest_random_mz).abs
      use_ppm ? delta./(nearest_random_mz).*(1e6) : delta
    end
    search_bin.probability_distribution = ProbabilityDistribution.deviations_to_probability_distribution((use_ppm ? :ppm : :amu), diffs)
  end
  search_bins
end

#create_search_bins(db_isobar_spectrum, min_n_per_bin) ⇒ Object



168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
# File 'lib/mspire/lipid/search.rb', line 168

def create_search_bins(db_isobar_spectrum, min_n_per_bin)
  # make sure we get the right bin size based on the input
  ss = db_isobar_spectrum.mzs.size ; optimal_num_groups = 1
  (1..ss).each do |divisions|
    if  (ss.to_f / divisions) >= min_n_per_bin
      optimal_num_groups = divisions
    else ; break
    end
  end

  mz_ranges = []
  prev = nil

  groups = db_isobar_spectrum.points.in_groups(optimal_num_groups,false).to_a

  case groups.size
  when 0
    raise 'I think you need some data in your query spectrum!'
  when 1
    group = groups.first
    [ Mspire::Lipid::Search::Bin.new( Range.new(group.first.first, group.last.first), db_isobar_spectrum ) ]
  else
    search_bins = groups.each_cons(2).map do |points1, points2|
      bin = Mspire::Lipid::Search::Bin.new( Range.new(points1.first.first, points2.first.first, true), db_isobar_spectrum )
      prev = points2
      bin
    end
    _range = Range.new(prev.first.first, prev.last.first)
    search_bins << Mspire::Lipid::Search::Bin.new(_range, db_isobar_spectrum) # inclusive
  end
end

#create_search_function(ions, opt) ⇒ Object



120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# File 'lib/mspire/lipid/search.rb', line 120

def create_search_function(ions, opt)

  db_isobar_spectrum = create_db_isobar_spectrum(ions)

  search_bins = create_search_bins(db_isobar_spectrum, opt[:query_min_count_per_bin])

  create_probability_distribution_for_search_bins!(search_bins, db_isobar_spectrum, opt[:num_rand_samples_per_bin], opt[:ppm])

  # create the actual search function
  # returns an array of hit_groups
  lambda do |search_queries, num_nearest_hits|
    Bin.bin(search_bins, search_queries, &:mz)
    search_bins_with_data = search_bins.reject {|bin| bin.data.empty? }
    hit_groups = search_bins_with_data.map {|bin| bin.queries_to_hit_groups!(opt[:num_nearest]) }.flatten(1)
  end
end

#qvalues!(hit_groups, opts) ⇒ Object



81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# File 'lib/mspire/lipid/search.rb', line 81

def qvalues!(hit_groups, opts)

  # from http://stats.stackexchange.com/questions/870/multiple-hypothesis-testing-correction-with-benjamini-hochberg-p-values-or-q-va
  # but I've already coded this up before, too, in multiple ways...
  prev_bh_value = 0
  num_total_tests = hit_groups.size

  #hit_groups.each {|hg| p [hg.first.pvalue, hg] }

  # calculate Q-values BH style for now:
  # first hit is the best hit in the group
  pval_hg_index_tuples = hit_groups.each_with_index.map {|hg,i| [hg.pvalue, hg.delta.abs, hg.ppm.abs, i, hg] }

  if pval_hg_index_tuples.any? {|pair| pair.first.nan? }
    $stderr.puts "pvalue of NaN!"
    $stderr.puts ">>> Consider increasing query_min_count_per_bin or setting ppm to false <<<"
    raise
  end

  sorted_pval_index_tuples = pval_hg_index_tuples.sort

  sorted_pval_index_tuples.each_with_index do |tuple,i|
    pval = tuple.first
    bh_value = pval * num_total_tests / (i + 1)
    # Sometimes this correction can give values greater than 1,
    # so we set those values at 1
    bh_value = [bh_value, 1].min

    # To preserve monotonicity in the values, we take the
    # maximum of the previous value or this one, so that we
    # don't yield a value less than the previous.
    bh_value = [bh_value, prev_bh_value].max
    prev_bh_value = bh_value
    tuple.last.first.qvalue = bh_value # give the top hit the q-value
  end

  sorted_pval_index_tuples.map(&:last)
end

#search(search_queries, opts = {}) ⇒ Object

returns an array of HitGroup and a parallel array of BH derived q-values (will switch to Storey soon enough). The HitGroups are returned in the order in which the mz_values are given. assumes search_queries are in ascending m/z order



67
68
69
70
71
72
73
74
75
76
77
78
79
# File 'lib/mspire/lipid/search.rb', line 67

def search(search_queries, opts={})
  opt = @options.merge( opts )
  hit_groups = @search_function.call(search_queries, opt[:num_nearest])
  sorted_hit_groups = qvalues!(hit_groups, opt)
  case opts[:return_order]
  when :given
    hit_groups
  when :sorted
    sorted_hit_groups
  else
    raise ArgumentError, "invalid :return_order"
  end
end