Class: Vanity::Experiment::AbTest

Inherits:
Base
  • Object
show all
Defined in:
lib/vanity/experiment/ab_test.rb

Overview

The meat.

Constant Summary

DEFAULT_SCORE_METHOD =
:z_score

Instance Attribute Summary

Attributes inherited from Base

#completed_at, #id, #name, #playground

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from Base

#active?, #complete_if, #created_at, #description, #identify, load, #on_assignment, #type, type

Constructor Details

#initialize(*args) ⇒ AbTest



24
25
26
27
28
# File 'lib/vanity/experiment/ab_test.rb', line 24

def initialize(*args)
  super
  @score_method = DEFAULT_SCORE_METHOD
  @use_probabilities = nil
end

Class Method Details

.friendly_nameObject



17
18
19
# File 'lib/vanity/experiment/ab_test.rb', line 17

def friendly_name
  "A/B Test"
end

.probability(score) ⇒ Object

Convert z-score to probability.



11
12
13
14
15
# File 'lib/vanity/experiment/ab_test.rb', line 11

def probability(score)
  score = score.abs
  probability = AbTest::Z_TO_PROBABILITY.find { |z,p| score >= z }
  probability ? probability.last : 0
end

Instance Method Details

#alternative(value) ⇒ Object

Returns an Alternative with the specified value.

Examples:

alternative(:red) == alternatives[0]
alternative(:blue) == alternatives[2]


83
84
85
# File 'lib/vanity/experiment/ab_test.rb', line 83

def alternative(value)
  alternatives.find { |alt| alt.value == value }
end

#alternatives(*args) ⇒ Object

Call this method once to set alternative values for this experiment (requires at least two values). Call without arguments to obtain current list of alternatives.

Examples:

Define A/B test with three alternatives

ab_test "Background color" do
  metrics :coolness
  alternatives "red", "blue", "orange"
end

Find out which alternatives this test uses

alts = experiment(:background_color).alternatives
puts "#{alts.count} alternatives, with the colors: #{alts.map(&:value).join(", ")}"


61
62
63
64
65
66
67
# File 'lib/vanity/experiment/ab_test.rb', line 61

def alternatives(*args)
  @alternatives = args.empty? ? [true, false] : args.clone
  class << self
    define_method :alternatives, instance_method(:_alternatives)
  end
  nil
end

#bayes_bandit_score(probability = 90) ⇒ Object

Scores alternatives based on the current tracking data, using Bayesian estimates of the best binomial bandit. Based on the R bandit package, cran.r-project.org/web/packages/bandit, which is based on Steven L. Scott, A modern Bayesian look at the multi-armed bandit, Appl. Stochastic Models Bus. Ind. 2010; 26:639-658. (www.economics.uci.edu/~ivan/asmb.874.pdf)

This method returns a structure with the following attributes:

:alts

Ordered list of alternatives, populated with scoring info.

:base

Second best performing alternative.

:least

Least performing alternative (but more than zero conversion).

:choice

Choice alternative, either the outcome or best alternative.

Alternatives returned by this method are populated with the following attributes:

:probability

Probability (probability this is the best alternative).

:difference

Difference from the least performant altenative.

The choice alternative is set only if its probability is higher or equal to the specified probability (default is 90%).



285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
# File 'lib/vanity/experiment/ab_test.rb', line 285

def bayes_bandit_score(probability = 90)
  begin
    require "backports/1.9.1/kernel/define_singleton_method" if RUBY_VERSION < "1.9"
    require "integration"
    require "rubystats"
  rescue LoadError
    fail "to use bayes_bandit_score, install integration and rubystats gems"
  end

  begin
    require "gsl"
  rescue LoadError
    warn "for better integration performance, install gsl gem"
  end

  BayesianBanditScore.new(alternatives, outcome).calculate!
end

#calculate_scoreObject

– Reporting –



213
214
215
216
217
218
219
# File 'lib/vanity/experiment/ab_test.rb', line 213

def calculate_score
  if respond_to?(score_method)
    self.send(score_method)
  else
    score
  end
end

#choose(request = nil) ⇒ Object

Chooses a value for this experiment. You probably want to use the Rails helper method ab_test instead.

This method picks an alternative for the current identity and returns the alternative's value. It will consistently choose the same alternative for the same identity, and randomly split alternatives between different identities.

Examples:

color = experiment(:which_blue).choose


136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
# File 'lib/vanity/experiment/ab_test.rb', line 136

def choose(request=nil)
  if @playground.collecting?
    if active?
      identity = identity()
      index = connection.ab_showing(@id, identity)
      unless index
        index = alternative_for(identity).to_i
        save_assignment_if_valid_visitor(identity, index, request) unless @playground.using_js?
      end
    else
      index = connection.ab_get_outcome(@id) || alternative_for(identity)
    end
  else
    identity = identity()
    @showing ||= {}
    @showing[identity] ||= alternative_for(identity)
    index = @showing[identity]
  end
  alternatives[index.to_i]
end

#chooses(value, request = nil) ⇒ Object

Forces this experiment to use a particular alternative. This may be used in test cases to force a specific alternative to obtain a deterministic test. This method also is used in the add_participant callback action when adding participants via vanity_js.

Examples:

Setup test to red button

setup do
  experiment(:button_color).chooses(:red)
end

def test_shows_red_button
  . . .
end

Use nil to clear selection

teardown do
  experiment(:green_button).chooses(nil)
end


178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
# File 'lib/vanity/experiment/ab_test.rb', line 178

def chooses(value, request=nil)
  if @playground.collecting?
    if value.nil?
      connection.ab_not_showing @id, identity
    else
      index = @alternatives.index(value)
      save_assignment_if_valid_visitor(identity, index, request)

      raise ArgumentError, "No alternative #{value.inspect} for #{name}" unless index
      if (connection.ab_showing(@id, identity) && connection.ab_showing(@id, identity) != index) ||
        alternative_for(identity) != index
        connection.ab_show(@id, identity, index)
      end
    end
  else
    @showing ||= {}
    @showing[identity] = value.nil? ? nil : @alternatives.index(value)
  end
  self
end

#complete!(outcome = nil) ⇒ Object



420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
# File 'lib/vanity/experiment/ab_test.rb', line 420

def complete!(outcome = nil)
  return unless @playground.collecting? && active?
  super

  unless outcome
    if @outcome_is
      begin
        result = @outcome_is.call
        outcome = result.id if Alternative === result && result.experiment == self
      rescue
        warn "Error in AbTest#complete!: #{$!}"
      end
    else
      best = score.best
      outcome = best.id if best
    end
  end
  # TODO: logging
  connection.ab_set_outcome @id, outcome || 0
end

#conclusion(score = score) ⇒ Object

Use the result of #score or #bayes_bandit_score to derive a conclusion. Returns an array of claims.



305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
# File 'lib/vanity/experiment/ab_test.rb', line 305

def conclusion(score = score)
  claims = []
  participants = score.alts.inject(0) { |t,alt| t + alt.participants }
  claims << case participants
    when 0 ; "There are no participants in this experiment yet."
    when 1 ; "There is one participant in this experiment."
    else ; "There are #{participants} participants in this experiment."
  end
  # only interested in sorted alternatives with conversion
  sorted = score.alts.select { |alt| alt.measure > 0.0 }.sort_by(&:measure).reverse
  if sorted.size > 1
    # start with alternatives that have conversion, from best to worst,
    # then alternatives with no conversion.
    sorted |= score.alts
    # we want a result that's clearly better than 2nd best.
    best, second = sorted[0], sorted[1]
    if best.measure > second.measure
      diff = ((best.measure - second.measure) / second.measure * 100).round
      better = " (%d%% better than %s)" % [diff, second.name] if diff > 0
      claims << "The best choice is %s: it converted at %.1f%%%s." % [best.name, best.measure * 100, better]
      if score.method == :bayes_bandit_score
        if best.probability >= 90
          claims << "With %d%% probability this result is the best." % score.best.probability
        else
          claims << "This result does not have strong confidence behind it, suggest you continue this experiment."
        end
      else
        if best.probability >= 90
          claims << "With %d%% probability this result is statistically significant." % score.best.probability
        else
          claims << "This result is not statistically significant, suggest you continue this experiment."
        end
      end
      sorted.delete best
    end
    sorted.each do |alt|
      if alt.measure > 0.0
        claims << "%s converted at %.1f%%." % [alt.name.gsub(/^o/, "O"), alt.measure * 100]
      else
        claims << "%s did not convert." % alt.name.gsub(/^o/, "O")
      end
    end
  else
    claims << "This experiment did not run long enough to find a clear winner."
  end
  claims << "#{score.choice.name.gsub(/^o/, "O")} selected as the best alternative." if score.choice
  claims
end

#destroyObject

– Store/validate –



444
445
446
447
# File 'lib/vanity/experiment/ab_test.rb', line 444

def destroy
  connection.destroy_experiment @id
  super
end

#false_trueObject Also known as: true_false

Defines an A/B test with two alternatives: false and true. This is the default pair of alternatives, so just syntactic sugar for those who love being explicit.

Examples:

ab_test "More bacon" do
  metrics :yummyness
  false_true
end


114
115
116
# File 'lib/vanity/experiment/ab_test.rb', line 114

def false_true
  alternatives false, true
end

#fingerprint(alternative) ⇒ Object

Returns fingerprint (hash) for given alternative. Can be used to lookup alternative for experiment without revealing what values are available (e.g. choosing alternative from HTTP query parameter).



122
123
124
# File 'lib/vanity/experiment/ab_test.rb', line 122

def fingerprint(alternative)
  Digest::MD5.hexdigest("#{id}:#{alternative.id}")[-10,10]
end

#metrics(*args) ⇒ Object

Tells A/B test which metric we're measuring, or returns metric in use.

Examples:

Define A/B test against coolness metric

ab_test "Background color" do
  metrics :coolness
  alternatives "red", "blue", "orange"
end

Find metric for A/B test

puts "Measures: " + experiment(:background_color).metrics.map(&:name)


41
42
43
44
# File 'lib/vanity/experiment/ab_test.rb', line 41

def metrics(*args)
  @metrics = args.map { |id| @playground.metric(id) } unless args.empty?
  @metrics
end

#outcomeObject

Alternative chosen when this experiment completed.



414
415
416
417
418
# File 'lib/vanity/experiment/ab_test.rb', line 414

def outcome
  return unless @playground.collecting?
  outcome = connection.ab_get_outcome(@id)
  outcome && _alternatives[outcome]
end

#outcome_is(&block) ⇒ Object

Defines how the experiment can choose the optimal outcome on completion.

By default, Vanity will take the best alternative (highest conversion rate) and use that as the outcome. You experiment may have different needs, maybe you want the least performing alternative, or factor cost in the equation?

The default implementation reads like this:

outcome_is do
  a, b = alternatives
  # a is expensive, only choose a if it performs 2x better than b
  a.measure > b.measure * 2 ? a : b
end

Raises:

  • (ArgumentError)


407
408
409
410
411
# File 'lib/vanity/experiment/ab_test.rb', line 407

def outcome_is(&block)
  raise ArgumentError, "Missing block" unless block
  raise "outcome_is already called on this experiment" if @outcome_is
  @outcome_is = block
end

#rebalance!Object

Force experiment to rebalance.



384
385
386
387
388
389
390
# File 'lib/vanity/experiment/ab_test.rb', line 384

def rebalance!
  return unless @playground.collecting?
  score_results = bayes_bandit_score
  if score_results.method == :bayes_bandit_score
    set_alternative_probabilities score_results.alts
  end
end

#rebalance_frequency(rf = nil) ⇒ Object

Sets or returns how often (as a function of number of people assigned) to rebalance. For example:

 ab_test "Simple" do
   rebalance_frequency 100
 end

puts "The experiment will automatically rebalance after every " + experiment(:simple).description + " users are assigned."


374
375
376
377
378
379
380
381
# File 'lib/vanity/experiment/ab_test.rb', line 374

def rebalance_frequency(rf = nil)
  if rf
    @assignments_since_rebalancing = 0
    @rebalance_frequency = rf
    rebalance!
  end
  @rebalance_frequency
end

#saveObject



449
450
451
452
453
454
455
456
457
458
459
460
461
# File 'lib/vanity/experiment/ab_test.rb', line 449

def save
  true_false unless @alternatives
  fail "Experiment #{name} needs at least two alternatives" unless @alternatives.size >= 2
  super
  if @metrics.nil? || @metrics.empty?
    warn "Please use metrics method to explicitly state which metric you are measuring against."
    metric = @playground.metrics[id] ||= Vanity::Metric.new(@playground, name)
    @metrics = [metric]
  end
  @metrics.each do |metric|
    metric.hook &method(:track!)
  end
end

#score(probability = 90) ⇒ Object

Scores alternatives based on the current tracking data. This method returns a structure with the following attributes:

:alts

Ordered list of alternatives, populated with scoring info.

:base

Second best performing alternative.

:least

Least performing alternative (but more than zero conversion).

:choice

Choice alternative, either the outcome or best alternative.

Alternatives returned by this method are populated with the following attributes:

:z_score

Z-score (relative to the base alternative).

:probability

Probability (z-score mapped to 0, 90, 95, 99 or 99.9%).

:difference

Difference from the least performant altenative.

The choice alternative is set only if its probability is higher or equal to the specified probability (default is 90%).



236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
# File 'lib/vanity/experiment/ab_test.rb', line 236

def score(probability = 90)
  alts = alternatives
  # sort by conversion rate to find second best and 2nd best
  sorted = alts.sort_by(&:measure)
  base = sorted[-2]
  # calculate z-score
  pc = base.measure
  nc = base.participants
  alts.each do |alt|
    p = alt.measure
    n = alt.participants
    alt.z_score = (p - pc) / ((p * (1-p)/n) + (pc * (1-pc)/nc)).abs ** 0.5
    alt.probability = AbTest.probability(alt.z_score)
  end
  # difference is measured from least performant
  if least = sorted.find { |alt| alt.measure > 0 }
    alts.each do |alt|
      if alt.measure > least.measure
        alt.difference = (alt.measure - least.measure) / least.measure * 100
      end
    end
  end
  # best alternative is one with highest conversion rate (best shot).
  # choice alternative can only pick best if we have high probability (>90%).
  best = sorted.last if sorted.last.measure > 0.0
  choice = outcome ? alts[outcome.id] : (best && best.probability >= probability ? best : nil)
  Struct.new(:alts, :best, :base, :least, :choice, :method).new(alts, best, base, least, choice, :score)
end

#score_method(method = nil) ⇒ Object

What method to use for calculating score. Default is :ab_test, but can also be set to :bayes_bandit_score to calculate probability of each alternative being the best.

ab_test “noodle_test” do

alternatives "spaghetti", "linguine"
metrics :signup
score_method :bayes_bandit_score

end

Examples:

Define A/B test which uses bayes_bandit_score in reporting



97
98
99
100
101
102
# File 'lib/vanity/experiment/ab_test.rb', line 97

def score_method(method=nil)
  if method
    @score_method = method
  end
  @score_method
end

#set_alternative_probabilities(alternative_probabilities) ⇒ Object

– Unequal probability assignments –



356
357
358
359
360
361
# File 'lib/vanity/experiment/ab_test.rb', line 356

def set_alternative_probabilities(alternative_probabilities)
  # create @use_probabilities as a function to go from [0,1] to outcome
  cumulative_probability = 0.0
  new_probabilities = alternative_probabilities.map {|am| [am, (cumulative_probability += am.probability)/100.0]}
  @use_probabilities = new_probabilities
end

#showing?(alternative) ⇒ Boolean

True if this alternative is currently showing (see #chooses).



200
201
202
203
204
205
206
207
208
# File 'lib/vanity/experiment/ab_test.rb', line 200

def showing?(alternative)
  identity = identity()
  if @playground.collecting?
    (connection.ab_showing(@id, identity) || alternative_for(identity)) == alternative.id
  else
    @showing ||= {}
    @showing[identity] == alternative.id
  end
end

#track!(metric_id, timestamp, count, *args) ⇒ Object

Called via a hook by the associated metric.



464
465
466
467
468
469
470
471
472
473
474
# File 'lib/vanity/experiment/ab_test.rb', line 464

def track!(metric_id, timestamp, count, *args)
  return unless active?
  identity = identity() rescue nil
  identity ||= args.last[:identity] if args.last.is_a?(Hash) && args.last[:identity]
  if identity
    return if connection.ab_showing(@id, identity)
    index = alternative_for(identity)
    connection.ab_add_conversion @id, index, identity, count
    check_completion!
  end
end