Module: TSV

Defined in:
lib/rbbt/statistics/random_walk.rb,
lib/rbbt/statistics/rank_product.rb,
lib/rbbt/statistics/hypergeometric.rb

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.rank_enrichment(tsv, list, options = {}) ⇒ Object



576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
# File 'lib/rbbt/statistics/random_walk.rb', line 576

def self.rank_enrichment(tsv, list, options = {})
  masked = options[:masked]
  if tsv.fields
    res = TSV.setup({}, :cast => :to_f, :type => :double, :key_field => tsv.key_field, :fields => ["p-value", tsv.fields.first, "Rank"]) 
  else
    res = TSV.setup({}, :cast => :to_f, :type => :double) 
  end

  list = list.clean_annotations if list.respond_to? :clean_annotations
  tsv.with_monitor :desc => "Rank enrichment" do
    tsv.with_unnamed do
      tsv.through do |key, values|
        next if masked and masked.include? key or values.nil?
        values = values.flatten.compact.reject{|v| v.empty?}
        matches = (values.respond_to?(:subset) ? values.subset(list) :  values & list).compact
        next if matches.length < 3
        list.extend OrderedList unless OrderedList === list
        total = list.length
        hits = list.hits(values).collect{|p| p.to_f / total}
        pvalue = rank_enrichment_for_list(list, values, options)
        res[key] = [pvalue, matches, hits]
      end
    end
  end

  FDR.adjust_hash! res, 0 if options[:fdr]

  res
end

.rank_enrichment_for_list(list, hits, options = {}) ⇒ Object



566
567
568
569
570
571
572
573
574
# File 'lib/rbbt/statistics/random_walk.rb', line 566

def self.rank_enrichment_for_list(list, hits, options = {})
  cutoff = options[:cutoff]
  list.extend OrderedList unless OrderedList === list
  if cutoff
    list.pvalue(hits, cutoff, options)
  else
    list.pvalue(hits, nil, options)
  end
end

Instance Method Details

#annotation_counts(fields = nil, persistence = false, options = {}) ⇒ Object



113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
# File 'lib/rbbt/statistics/hypergeometric.rb', line 113

def annotation_counts(fields = nil, persistence = false, options = {})
  fields ||= self.fields
  fields = [fields] if String === fields or Symbol === fields
  rename = options.delete :rename
  background = options.delete :background

  field_pos = fields.collect{|f| self.fields.index f}.compact
  persistence_path = self.respond_to?(:persistence_path)? self.persistence_path : nil
  Persist.persist(filename, :yaml, :fields => fields, :persist => persistence, :prefix => "Hyp.Geo.Counts", :other => {:background => background, :rename => rename, :persistence_path => persistence_path}) do 
    data ||= {}

    with_unnamed do

      case type
      when :single
        through :key, field_pos do |key, value|
          next if background and not background.include?(key)
          next if value.nil? 
          data[value] ||= []
          data[value] << key
        end
      when :double
        through :key, field_pos do |key, values|
          next if background and not background.include?(key)
          values.flatten.compact.uniq.each{|value| data[value] ||= []; data[value] << key}
        end
      when :list
        through :key, field_pos do |key, values|
          next if values.nil?
          next if background and not background.include?(key)
          values.compact.uniq.each{|value| data[value] ||= []; data[value] << key}
        end
      when :flat
        through :key, field_pos do |key, values|
          next if values.nil?
          next if background and not background.include?(key)
          values.compact.uniq.each{|value| data[value] ||= []; data[value] << key}
        end
      end

    end

    if rename
      Log.debug("Using renames during annotation counts")
      Hash[*data.keys.zip(data.values.collect{|l| l.collect{|e| rename.include?(e)? rename[e] : e }.uniq.length }).flatten]
    else
      Hash[*data.keys.zip(data.values.collect{|l| l.uniq.length}).flatten]
    end
  end
end

#enrichment(list, fields = nil, options = {}) ⇒ Object



164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
# File 'lib/rbbt/statistics/hypergeometric.rb', line 164

def enrichment(list, fields = nil, options = {})
  options = Misc.add_defaults options, :skip_missing => true, :background => nil
  background, skip_missing = Misc.process_options options, :background, :skip_missing

  list = list.compact.uniq

  if Array === background and not background.empty?
    filter
    add_filter(:key, background)
    if defined? AnnotatedArray and AnnotatedArray === list
      list = list.subset background
    else
      list = list & background
    end
  end

  with_unnamed do
    fields ||= self.fields.first
    options = Misc.add_defaults options, :min_support => 3, :fdr => true, :cutoff => false, :add_keys => true

    add_keys, rename, masked = Misc.process_options options, :add_keys, :rename, :masked

    Log.debug "Enrichment analysis of field #{fields.inspect} for #{list.length} entities"

    selected = select :key => list.uniq

    found = selected.keys.length
    Log.debug "Found #{found} of #{list.length} entities"

    if skip_missing
      total = found
      Log.debug "Using #{ found } as sample size; skipping missing"
    else
      total = list.length
      Log.debug "Using #{ list.length } as sample size"
    end

    if background
      tsv_size = background.length
      counts = annotation_counts fields, options[:persist], :rename => rename, :masked => masked, :background => background
    else
      tsv_size = keys.length
      counts = annotation_counts fields, options[:persist], :rename => rename, :masked => masked
    end


    annotation_keys = Hash.new
    selected.with_unnamed do

      case type
      when :single
        selected.through :key, fields do |key, value|
          value = value.dup
          annotation_keys[value] ||= []
          annotation_keys[value] << key
        end

      when :double
        selected.through :key, fields do |key, values|
          values.flatten.compact.uniq.reject{|value| value.empty?}.each{|value| 
            value = value.dup
            annotation_keys[value] ||= []
            annotation_keys[value] << key
          }
        end

      when :list
        selected.through :key, fields do |key, values|
          values.compact.uniq.reject{|value| value.empty?}.each{|value| 
            value = value.dup
            annotation_keys[value] ||= []
            annotation_keys[value] << key
          }
        end

      when :flat
        selected.through do |key, values|
          next if values.nil?
          values.compact.uniq.reject{|value| value.empty?}.each{|value| 
            value = value.dup
            annotation_keys[value] ||= []
            annotation_keys[value] << key
          }
        end
      end
    end

    if Array === background and not background.empty?
      reset_filters
      pop_filter
    end

    pvalues = {}
    annotation_keys.each do |annotation, elems|
      next if masked and masked.include? annotation
      elems = elems.collect{|elem| rename.include?(elem)? rename[elem] : elem }.compact.uniq if rename
      count = elems.length
      next if count < options[:min_support] or not counts.include? annotation
      pvalues[annotation] = Hypergeometric.hypergeometric(count, counts[annotation], tsv_size - counts[annotation], total)
    end

    pvalues = FDR.adjust_hash! pvalues if options[:fdr]

    pvalues.delete_if{|k, pvalue| pvalue > options[:cutoff] } if options[:cutoff]

    if add_keys
      tsv = TSV.setup(pvalues.keys.collect{|k| k.dup}, :key_field => fields, :fields => [], :type => :double)

      tsv.add_field 'p-value' do |annot, values|
        [pvalues[annot]]
      end

      tsv.add_field self.key_field do |annot, values|
        if list.respond_to? :annotate
          list.annotate annotation_keys[annot]
        else
          annotation_keys[annot]
        end
      end

      tsv
    else
      TSV.setup(pvalues, :key_field => fields, :fields => ["p-value"], :cast => :to_f, :type => :single)
    end

  end
end

#enrichment_for(tsv, field, options = {}) ⇒ Object



292
293
294
295
296
297
298
299
300
301
# File 'lib/rbbt/statistics/hypergeometric.rb', line 292

def enrichment_for(tsv, field, options = {} )
  tsv = tsv.tsv if Path === tsv
  index = TSV.find_traversal(self, tsv, :in_namespace => false, :persist_input => true)

  raise "Cannot traverse identifiers" if index.nil?

  source_keys = index.values_at(*self.keys).flatten.compact.uniq

  tsv.enrichment source_keys, field, options
end

#rank_enrichment(list, options = {}) ⇒ Object



606
607
608
# File 'lib/rbbt/statistics/random_walk.rb', line 606

def rank_enrichment(list, options = {})
  TSV.rank_enrichment(self, list, options)
end

#rank_product(fields, reverse = false, &block) ⇒ Object



43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# File 'lib/rbbt/statistics/rank_product.rb', line 43

def rank_product(fields, reverse = false, &block)
  tsv = self.slice(fields)

  if block_given?
    scores = fields.collect{|field| tsv.sort_by(field, true, &block)}
  else
    scores = fields.collect{|field| tsv.sort_by(field, true){|gene,values|  (tsv.type == :single or tsv.type == :list) ? values.to_f : values.flatten.first.to_f}}
  end
  positions = {}
  
  if reverse
    size = self.size
    tsv.keys.each do |entity|
      positions[entity] = scores.collect{|list| size - list.index(entity)}
    end
  else
    tsv.keys.each do |entity|
      positions[entity] = scores.collect{|list| list.index(entity) + 1}
    end
  end

  signature_sizes = fields.collect{|field| slice(field).values.select{|v| v and not (v.respond_to?(:empty?) and v.empty?)}.length} 

  score = RankProduct.score(positions, signature_sizes)

  score
end

#ranks_for(field) ⇒ Object



610
611
612
613
614
615
616
617
618
619
620
621
# File 'lib/rbbt/statistics/random_walk.rb', line 610

def ranks_for(field)
  ranks = TSV.setup({}, :key_field => self.key_field, :fields => ["Rank"], :type => :single, :cast => :to_i)
  sort_by(field, true).each_with_index do |k, i|
    ranks[k] = i
  end

  ranks.entity_options = entity_options
  ranks.entity_templates = entity_templates
  ranks.namespace = namespace

  ranks
end