Module: ForChrisLib

Includes:
ChrisLib, Math
Defined in:
lib/chris_lib/for_chris_lib.rb

Overview

Aggregated analytical helpers formerly housed in golf_lab.

Defined Under Namespace

Classes: ChiSquaredStdErr, Framed, PChiSquared

Constant Summary

Constants included from ChrisLib

ChrisLib::VERSION

Instance Method Summary collapse

Instance Method Details

#acf(x_a, lag) ⇒ Float

Autocorrelation at a specific lag.

Raises:



292
293
294
295
296
297
298
299
300
301
302
303
304
# File 'lib/chris_lib/for_chris_lib.rb', line 292

def acf(x_a, lag)
  raise ForChrisLibError, 'lag must be a non-negative Integer' unless lag.is_a?(Integer) && lag >= 0
  raise ForChrisLibError, 'x_a must respond to #size and #[ ]' unless x_a.respond_to?(:size) && x_a.respond_to?(:[])
  n = x_a.size
  raise "Lag is too large, n = #{n}, lag = #{lag}" if n < lag + 1

  mu = x_a.mean
  total = 0
  x_a[0..-(lag + 1)].each_with_index do |x, i|
    total += (x - mu) * (x_a[i + lag] - mu)
  end
  total.to_f / (n - lag) / x_a.var
end

#arbitrary_cdf_a(func, options, n_samples: 100) ⇒ Array<Array<Float, Float>>

Sample a cumulative distribution function for plotting.

Raises:



505
506
507
508
509
510
511
512
513
514
# File 'lib/chris_lib/for_chris_lib.rb', line 505

def arbitrary_cdf_a(func, options, n_samples: 100)
  raise ForChrisLibError, 'n_samples must be greater than 1' unless n_samples.is_a?(Integer) && n_samples > 1
  raise ForChrisLibError, 'function must be defined' unless respond_to?(func)
  width = 8.0
  h = width / (n_samples - 1)
  x_a = (1..n_samples).map { |i| -width / 2 + (i - 1) * h }
  x_a.map do |x|
    [x, cdf_calc(x, func, options)]
  end
end

#bias_estimate_by_min(store, win_loss_calculator: nil, minimizer_class: nil) ⇒ Float

Estimate bias in a histogram by minimising win/loss difference between players.



207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
# File 'lib/chris_lib/for_chris_lib.rb', line 207

def bias_estimate_by_min(store, win_loss_calculator: nil, minimizer_class: nil)
  win_loss = win_loss_calculator || default_win_loss_calculator
  unless store.respond_to?(:histogram) && store.respond_to?(:min) && store.respond_to?(:max)
    raise ForChrisLibError, 'store must respond to :histogram, :min, and :max'
  end
  histogram_data = store.histogram
  unless histogram_data.respond_to?(:[]) && histogram_data[0]
    raise ForChrisLibError, 'store.histogram must include counts in the first slot'
  end

  fn = lambda do |x|
    bins = store.histogram[0].bin_shift(x)
    pdf = pdf_from_hist(bins, min: store.min)
    wl_graph = win_loss.win_loss_graph(nil, pdf: pdf)
    outcome = win_loss.win_loss_stats(wl_graph)[0]
    (outcome - 50.0)**2
  end

  minimizer_class ||= default_minimizer_class
  unless minimizer_class.respond_to?(:new)
    raise ForChrisLibError, 'minimizer_class must respond to .new'
  end
  minimizer = minimizer_class.new(store.min, store.max, fn)
  minimizer.expected = 0.0 if minimizer.respond_to?(:expected=)
  minimizer.iterate
  -minimizer.x_minimum
end

#cdf_calc(x, func, options, mu: 0, sigma: 1, n_pts: 100) ⇒ Float

Numerical integration helper for CDFs.

Raises:



536
537
538
539
540
541
542
# File 'lib/chris_lib/for_chris_lib.rb', line 536

def cdf_calc(x, func, options, mu: 0, sigma: 1, n_pts: 100)
  raise "n_pts must be even (received n_pts=#{n_pts})" unless n_pts.even?
  raise ForChrisLibError, 'integration function must be defined' unless respond_to?(func)

  a = x - mu < -3 * sigma ? x - 2 * sigma + mu : -5 * sigma + mu
  simpson(func, a, x, n_pts, options)
end

#cdf_from_bins(bins, min = 0, delta = 1) ⇒ Hash{Numeric=>Float}

Cumulative distribution function derived from histogram bins.



396
397
398
# File 'lib/chris_lib/for_chris_lib.rb', line 396

def cdf_from_bins(bins, min = 0, delta = 1)
  pdf_from_bins(bins, min, delta).cdf_from_pdf
end

#computer_name_shortString



557
558
559
560
561
562
563
564
# File 'lib/chris_lib/for_chris_lib.rb', line 557

def computer_name_short
  host = `hostname`
  if host.nil? || host.empty?
    warn 'computer_name_short could not determine hostname'
    return nil
  end
  host[0..9]
end

#delimit(number, delimiter = ',', separator = '.') ⇒ String

Format a number with thousands delimiters.

Raises:



549
550
551
552
553
554
# File 'lib/chris_lib/for_chris_lib.rb', line 549

def delimit(number, delimiter = ',', separator = '.')
  raise ForChrisLibError, 'number must respond to #to_s' unless number.respond_to?(:to_s)
  parts = number.to_s.split('.')
  parts[0].gsub!(/(\d)(?=(\d\d\d)+(?!\d))/, "\\1#{delimiter}")
  parts.join separator
end

#fvu(y_hat_a:, y_a:) ⇒ Float

Fraction of variance unexplained given predictions and observations.

Raises:



190
191
192
193
194
195
196
197
198
199
200
# File 'lib/chris_lib/for_chris_lib.rb', line 190

def fvu(y_hat_a:, y_a:)
  raise ForChrisLibError, 'y_hat_a must respond to #size and #zip' unless y_hat_a.respond_to?(:size) && y_hat_a.respond_to?(:zip)
  raise ForChrisLibError, 'y_a must respond to #size' unless y_a.respond_to?(:size)
  raise ForChrisLibError, 'y_hat_a must contain at least two values' if y_hat_a.size < 2
  raise ForChrisLibError, 'y_hat_a and y_a must be the same length' unless y_hat_a.size == y_a.size

  ss_err = y_hat_a.zip(y_a).sum { |yh, y| (y - yh)**2 }.to_f
  y_mu = y_a.mean
  ss_tot = y_a.sum { |y| (y - y_mu)**2 }.to_f
  ss_err / ss_tot
end

#inc_m2_var(x, accumulator) ⇒ Array<Numeric>

Incremental mean and second central moment accumulator.

Raises:



277
278
279
280
281
282
283
284
285
286
# File 'lib/chris_lib/for_chris_lib.rb', line 277

def inc_m2_var(x, accumulator)
  raise ForChrisLibError, 'accumulator must be an array of [mean, m2, n]' unless accumulator.is_a?(Array) && accumulator.size == 3
  mean, m2, n = accumulator
  n += 1
  delta = x - mean
  mean += delta.to_f / n
  delta2 = x - mean
  m2 += delta * delta2
  [mean, m2, n]
end

#interpolate(x, x_L, x_U, y_L, y_U) ⇒ Float

Linear interpolation between two points.



495
496
497
498
# File 'lib/chris_lib/for_chris_lib.rb', line 495

def interpolate(x, x_L, x_U, y_L, y_U)
  m = (y_U - y_L) / (x_U - x_L)
  (x - x_L) * m + y_L
end

#inverse_transform_rand(cdf_a) ⇒ Float

Inverse transform sampling based on a discretised CDF array.

Raises:



479
480
481
482
483
484
485
486
487
488
489
490
491
# File 'lib/chris_lib/for_chris_lib.rb', line 479

def inverse_transform_rand(cdf_a)
  raise ForChrisLibError, 'cdf_a must be an array of coordinate pairs' unless cdf_a.respond_to?(:map) && cdf_a.all? { |pair| pair.is_a?(Array) && pair.size >= 2 }
  p_a = cdf_a.map { |pair| pair[1] }
  x_a = cdf_a.map { |pair| pair[0] }
  p_min = p_a.first
  p_max = p_a.last
  p_rand = rand
  return p_min if p_rand <= p_min
  return p_max if p_rand >= p_max

  i = p_a.find_index { |p| p > p_rand }
  interpolate(p_rand, p_a[i - 1], p_a[i], x_a[i - 1], x_a[i])
end

#normal_cdf(x) ⇒ Float

Standard normal cumulative distribution function.



415
416
417
# File 'lib/chris_lib/for_chris_lib.rb', line 415

def normal_cdf(x)
  0.5 * (1 + erf(x / sqrt(2)))
end

#normal_pdf(x, options = {}) ⇒ Float

Standard normal (or shifted) probability density function.

Raises:



404
405
406
407
408
409
410
# File 'lib/chris_lib/for_chris_lib.rb', line 404

def normal_pdf(x, options = {})
  params = { mu: 0, sigma: 1 }.merge(options)
  mu = params[:mu]
  sigma = params[:sigma]
  raise ForChrisLibError, 'sigma must be positive' unless sigma.is_a?(Numeric) && sigma.positive?
  E**(-(x - mu)**2 / 2 / sigma**2) / sqrt(2 * PI) / sigma
end

#outcome(results) ⇒ Array<Float>

Compute probabilities of winning given an array of scores.

Raises:



14
15
16
17
18
19
20
21
22
# File 'lib/chris_lib/for_chris_lib.rb', line 14

def outcome(results)
  raise ForChrisLibError, 'results must respond to #each' unless results.respond_to?(:each)
  results = results.to_a
  raise ForChrisLibError, 'results cannot be empty' if results.empty?
  s_min = results.min
  flags = results.map { |value| value == s_min ? 1 : 0 }
  total = flags.sum.nonzero? || 1
  flags.map { |value| value.to_f / total }
end

#parabola(x, options = {}) ⇒ Numeric

Evaluate quadratic polynomial with configurable coefficients.



440
441
442
443
444
445
446
# File 'lib/chris_lib/for_chris_lib.rb', line 440

def parabola(x, options = {})
  params = { a: 2, b: 3, c: 4 }.merge(options)
  a = params[:a]
  b = params[:b]
  c = params[:c]
  a * x**2 + b * x + c
end

#pdf_from_bins(bins, min = 0, delta = 1) ⇒ Hash{Numeric=>Float}

Probability mass function derived from histogram bins.

Raises:



388
389
390
391
392
# File 'lib/chris_lib/for_chris_lib.rb', line 388

def pdf_from_bins(bins, min = 0, delta = 1)
  raise ForChrisLibError, 'bins must respond to #sum and #each_with_index' unless bins.respond_to?(:sum) && bins.respond_to?(:each_with_index)
  total = bins.sum.nonzero? || 1
  bins.each_with_index.map { |bin, i| [min * delta + i * delta, bin.to_f / total] }.to_h
end

#pdf_from_hist(bins, min: 0) ⇒ Hash{Integer=>Float}

Convert integer bin counts into a probability mass function.



239
240
241
242
243
244
245
# File 'lib/chris_lib/for_chris_lib.rb', line 239

def pdf_from_hist(bins, min: 0)
  unless bins.respond_to?(:each_with_index) && bins.respond_to?(:sum)
    raise ForChrisLibError, 'bins must respond to #each_with_index and #sum'
  end
  total = bins.sum.nonzero? || 1
  bins.map.with_index { |b, i| [i + min, b.to_f / total] }.to_h
end

#simpson(func, a, b, n, options = {}) ⇒ Float

Simpson’s rule numerical integration for functions referenced by symbol.

Raises:



455
456
457
458
459
460
461
462
463
464
# File 'lib/chris_lib/for_chris_lib.rb', line 455

def simpson(func, a, b, n, options = {})
  raise "n must be even (received n=#{n})" unless n.even?
  raise ForChrisLibError, 'integration function must be defined' unless respond_to?(func)

  h = (b - a).to_f / n
  s = send(func, a, options) + send(func, b, options)
  (1..n).step(2) { |i| s += 4 * send(func, a + i * h, options) }
  (2..n - 1).step(2) { |i| s += 2 * send(func, a + i * h, options) }
  s * h / 3
end

#skew_normal_cdf_a(options, n_samples: 100) ⇒ Array<Array<Float, Float>>

Discretised skew-normal cumulative distribution function.

Raises:



518
519
520
521
522
523
524
525
526
# File 'lib/chris_lib/for_chris_lib.rb', line 518

def skew_normal_cdf_a(options, n_samples: 100)
  raise ForChrisLibError, 'n_samples must be greater than 1' unless n_samples.is_a?(Integer) && n_samples > 1
  width = 8.0
  h = width / (n_samples - 1)
  x_a = (1..n_samples).map { |i| -width / 2 + (i - 1) * h }
  x_a.map do |x|
    [x, cdf_calc(x, :skew_normal_pdf, options)]
  end
end

#skew_normal_pdf(x, options = { alpha: 0 }) ⇒ Float

Skew-normal probability density function using alpha parameterisation.

Raises:



423
424
425
426
427
428
# File 'lib/chris_lib/for_chris_lib.rb', line 423

def skew_normal_pdf(x, options = { alpha: 0 })
  params = { alpha: 0 }.merge(options)
  alpha = params[:alpha]
  raise ForChrisLibError, 'alpha must be numeric' unless alpha.is_a?(Numeric)
  2 * normal_pdf(x) * normal_cdf(alpha * x)
end

#skew_normal_rand(_x, options = { alpha: 0 }) ⇒ Float

Placeholder skew-normal sampler backed by numerical integration.



432
433
434
# File 'lib/chris_lib/for_chris_lib.rb', line 432

def skew_normal_rand(_x, options = { alpha: 0 })
  cdf_calc(rand, :normal_pdf, { mu: 2, sigma: 4 }, n_pts: 100, sigma: 4, mu: 2)
end

#skew_normal_rand_a(n, alpha) ⇒ Array<Float>

Generate random samples from the skew-normal distribution using inverse transform.

Raises:



470
471
472
473
474
# File 'lib/chris_lib/for_chris_lib.rb', line 470

def skew_normal_rand_a(n, alpha)
  raise ForChrisLibError, 'n must be a positive Integer' unless n.is_a?(Integer) && n.positive?
  cdf_a = arbitrary_cdf_a(:skew_normal_pdf, { alpha: alpha })
  (1..n).map { inverse_transform_rand(cdf_a) }
end

#summed_bins_histogram(x_y, n_bins) ⇒ Array<Array<Float, Numeric, Integer>>

Sum y values into equi-width x bins.

Raises:



251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
# File 'lib/chris_lib/for_chris_lib.rb', line 251

def summed_bins_histogram(x_y, n_bins)
  raise ForChrisLibError, 'x_y must respond to #transpose' unless x_y.respond_to?(:transpose)
  raise ForChrisLibError, 'n_bins must be a positive Integer' unless n_bins.is_a?(Integer) && n_bins.positive?
  x_a = x_y.transpose[0]
  y_a = x_y.transpose[1]
  min = x_a.min
  max = x_a.max
  bin_sums = Array.new(n_bins, 0)
  bins = Array.new(n_bins, 0)
  delta = (max - min).to_f / n_bins

  x_a.each_with_index do |x, i|
    j = [((x - min).to_f / delta), n_bins - 1].min
    bin_sums[j] += y_a[i]
    bins[j] += 1
  end

  bin_sums.each_with_index.map do |bin_sum, i|
    [min + delta / 2 + i * delta, bin_sum, bins[i]]
  end
end

#testString



182
183
184
# File 'lib/chris_lib/for_chris_lib.rb', line 182

def test
  'here'
end

#weighted_m_3(bins, mu, min = 0, delta = 1) ⇒ Float?

Weighted third central moment.

Raises:



358
359
360
361
362
363
364
365
366
367
368
369
# File 'lib/chris_lib/for_chris_lib.rb', line 358

def weighted_m_3(bins, mu, min = 0, delta = 1)
  raise ForChrisLibError, 'bins must respond to #sum and #each_with_index' unless bins.respond_to?(:sum) && bins.respond_to?(:each_with_index)
  raise ForChrisLibError, 'mu must be Numeric' unless mu.is_a?(Numeric)
  n = bins.sum
  return if n < 1

  sum = bins.each_with_index.sum do |w, i|
    v = min * delta + i * delta
    (v - mu)**3 * w
  end
  sum / n
end

#weighted_m_4(bins, mu, min = 0, delta = 1) ⇒ Float?

Weighted fourth central moment.

Raises:



373
374
375
376
377
378
379
380
381
382
383
384
# File 'lib/chris_lib/for_chris_lib.rb', line 373

def weighted_m_4(bins, mu, min = 0, delta = 1)
  raise ForChrisLibError, 'bins must respond to #sum and #each_with_index' unless bins.respond_to?(:sum) && bins.respond_to?(:each_with_index)
  raise ForChrisLibError, 'mu must be Numeric' unless mu.is_a?(Numeric)
  n = bins.sum
  return if n < 1

  sum = bins.each_with_index.sum do |w, i|
    v = min * delta + i * delta
    (v - mu)**4 * w
  end
  sum / n
end

#weighted_mean(bins, min = 0, delta = 1) ⇒ Float?

Weighted mean based on histogram bins.

Raises:



311
312
313
314
315
316
317
318
319
# File 'lib/chris_lib/for_chris_lib.rb', line 311

def weighted_mean(bins, min = 0, delta = 1)
  raise ForChrisLibError, 'bins must respond to #sum and #each_with_index' unless bins.respond_to?(:sum) && bins.respond_to?(:each_with_index)
  return nil if bins.sum.zero?

  sum = bins.each_with_index.sum do |w, i|
    (min * delta + i * delta) * w
  end
  sum.to_f / bins.sum
end

#weighted_sd(bins, mu, min = 0, delta = 1) ⇒ Float?

Weighted sample standard deviation.

Raises:



327
328
329
330
331
332
333
334
335
336
337
# File 'lib/chris_lib/for_chris_lib.rb', line 327

def weighted_sd(bins, mu, min = 0, delta = 1)
  raise ForChrisLibError, 'bins must respond to #sum and #each_with_index' unless bins.respond_to?(:sum) && bins.respond_to?(:each_with_index)
  raise ForChrisLibError, 'mu must be Numeric' unless mu.is_a?(Numeric)
  return nil if bins.sum < 2

  sum = bins.each_with_index.sum do |w, i|
    v = min * delta + i * delta
    (v - mu)**2 * w
  end
  sqrt(sum / (bins.sum - 1))
end

#weighted_skewness(bins, mu, min = 0, delta = 1) ⇒ Float?

Weighted skewness using the third central moment.

Raises:



345
346
347
348
349
350
351
352
353
354
# File 'lib/chris_lib/for_chris_lib.rb', line 345

def weighted_skewness(bins, mu, min = 0, delta = 1)
  raise ForChrisLibError, 'bins must respond to #sum and #each_with_index' unless bins.respond_to?(:sum) && bins.respond_to?(:each_with_index)
  raise ForChrisLibError, 'mu must be Numeric' unless mu.is_a?(Numeric)
  n = bins.sum
  return nil if n < 2

  third_moment = weighted_m_3(bins, mu, min, delta)
  sd = weighted_sd(bins, mu, min, delta)
  third_moment / sd**3
end