Class: Array

Inherits:
Object
  • Object
show all
Includes:
EnumerableStatistics::ArrayExtension
Defined in:
(unknown)

Instance Method Summary collapse

Methods included from EnumerableStatistics::ArrayExtension

#argmax, #argmin, #find_max, #find_min

Instance Method Details

#histogram(nbins = :auto, weight: nil, closed: :left) ⇒ EnumerableStatistics::Histogram

Returns The histogram struct.

Parameters:

  • nbins (Integer)

    The approximate number of bins

  • closed (:left, :right)

    If :left (the default), the bin interval are left-closed. If :right, the bin interval are right-closed.

Returns:



2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
# File 'ext/enumerable/statistics/extension/statistics.c', line 2330

static VALUE
ary_histogram(int argc, VALUE *argv, VALUE ary)
{
  VALUE arg0, kwargs, bin_weights;
  long nbins, n_bin_weights, i;

  VALUE weight_array = Qnil;
  VALUE edges = Qnil;
  int left_p = 1;

  rb_scan_args(argc, argv, "01:", &arg0, &kwargs);
  if (NIL_P(arg0) || arg0 == sym_auto) {
    nbins = sturges(RARRAY_LEN(ary));
  }
  else {
    nbins = NUM2LONG(arg0);
  }

  if (!NIL_P(kwargs)) {
    enum { kw_weights, kw_edges, kw_closed };
    static ID kwarg_keys[3];
    VALUE kwarg_vals[3];

    if (!kwarg_keys[0]) {
      kwarg_keys[kw_weights] = rb_intern("weights");
      kwarg_keys[kw_edges]  = rb_intern("edges");
      kwarg_keys[kw_closed] = rb_intern("closed");
    }

    rb_get_kwargs(kwargs, kwarg_keys, 0, 3, kwarg_vals);

    weight_array = check_histogram_weight_array(kwarg_vals[kw_weights], RARRAY_LEN(ary));
    edges = check_histogram_edges(kwarg_vals[kw_edges]);
    left_p = check_histogram_left_p(kwarg_vals[kw_closed]);
  }

  if (NIL_P(edges)) {
    edges = ary_histogram_calculate_edge(ary, nbins, left_p);
  }

  n_bin_weights = RARRAY_LEN(edges) - 1;
  bin_weights = rb_ary_new_capa(n_bin_weights);
  for (i = 0; i < n_bin_weights; ++i) {
    rb_ary_store(bin_weights, i, INT2FIX(0));
  }

  histogram_weights_push_values(bin_weights, edges, ary, weight_array, left_p);

  return rb_struct_new(cHistogram, edges, bin_weights,
                       left_p ? sym_left : sym_right,
                       Qfalse);
}

#meanNumber

Calculate a mean of the values in ary. This method utilizes Kahan summation algorithm to compensate the result precision when the enum includes Float values.

Returns:

  • (Number)

    A mean value



900
901
902
903
904
905
906
# File 'ext/enumerable/statistics/extension/statistics.c', line 900

static VALUE
ary_mean(VALUE ary)
{
  VALUE mean;
  ary_mean_variance(ary, &mean, NULL, 1);
  return mean;
}

#mean_stdev(population: false) ⇒ mean, stdev

Calculate a mean and a standard deviation of the values in ary. The first element of the result array is the mean, and the second is the standard deviation.

This method is equivalent to:

def mean_stdev(population: false)
  m, v = mean_variance(population: population)
  [m, Math.sqrt(v)]
end

Returns:



1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
# File 'ext/enumerable/statistics/extension/statistics.c', line 1446

static VALUE
ary_mean_stdev(int argc, VALUE* argv, VALUE ary)
{
  VALUE opts, mean, variance;
  size_t ddof = 1;

  rb_scan_args(argc, argv, "0:", &opts);
  if (opt_population_p(opts))
    ddof = 0;

  ary_mean_variance(ary, &mean, &variance, ddof);
  VALUE stdev = sqrt_value(variance);
  return rb_assoc_new(mean, stdev);
}

#mean_variance(population: false) ⇒ mean, variance

Calculate a mean and a variance of the values in ary. The first element of the result array is the mean, and the second is the variance.

When the population: keyword parameter is true, the variance is calculated as a population variance (divided by $n$). The default population: keyword parameter is false; this means the variance is a sample variance (divided by $n-1$).

This method scan values in ary only once, and does not cache the values on memory.

Returns:

  • (mean, variance)

    Two element array consists of mean and variance values



876
877
878
879
880
881
882
883
884
885
886
887
888
# File 'ext/enumerable/statistics/extension/statistics.c', line 876

static VALUE
ary_mean_variance_m(int argc, VALUE* argv, VALUE ary)
{
  VALUE opts, mean, variance;
  size_t ddof = 1;

  rb_scan_args(argc, argv, "0:", &opts);
  if (opt_population_p(opts))
    ddof = 0;

  ary_mean_variance(ary, &mean, &variance, ddof);
  return rb_assoc_new(mean, variance);
}

#medianFloat

Calculate a median of the values in ary.

Returns:

  • (Float)

Returns:

  • (Float)

    A median value



1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
# File 'ext/enumerable/statistics/extension/statistics.c', line 1692

static VALUE
ary_median(VALUE ary)
{
  long n;
  VALUE sorted, a0, a1;

  n = RARRAY_LEN(ary);
  switch (n) {
    case 0:
      goto return_nan;
    case 1:
      return RARRAY_AREF(ary, 0);
    case 2:
      a0 = RARRAY_AREF(ary, 0);
      a1 = RARRAY_AREF(ary, 1);
      goto mean_two;
    default:
      break;
  }

  sorted = ary_percentile_make_sorted(ary);

  a0 = RARRAY_AREF(sorted, 0);
  if (is_na(a0)) {
return_nan:
    return DBL2NUM(nan(""));
  }

  a1 = RARRAY_AREF(sorted, n / 2);
  if (n % 2 == 1) {
    return a1;
  }
  else {
    a0 = RARRAY_AREF(sorted, n / 2 - 1);

mean_two:
    a0 = rb_funcall(a0, idPLUS, 1, a1); /* TODO: optimize */
    if (RB_INTEGER_TYPE_P(a0) || RB_FLOAT_TYPE_P(a0) || RB_TYPE_P(a0, T_RATIONAL)) {
      double d = NUM2DBL(a0);
      return DBL2NUM(d / 2.0);
    }

    return rb_funcall(a0, idDIV, 1, DBL2NUM(2.0));
  }
}

#percentile(q) ⇒ Float

Calculate specified percentiles of the values in ary.

Returns:

  • (Float)

Parameters:

  • percentile (Number, Array)

    or array of percentiles to compute, which must be between 0 and 100 inclusive.

Returns:

  • (Float, Array)

    A percentile value(s)



1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
# File 'ext/enumerable/statistics/extension/statistics.c', line 1625

static VALUE
ary_percentile(VALUE ary, VALUE q)
{
  long n, m, i;
  double d;
  VALUE qf, qs, sorted, res;

  n = RARRAY_LEN(ary);
  if (n == 0) {
    rb_raise(rb_eArgError, "unable to compute percentile(s) for an empty array");
  }

  qs = rb_check_convert_type(q, T_ARRAY, "Array", "to_ary");
  if (NIL_P(qs)) {
    return ary_percentile_single(ary, q);
  }

  m = RARRAY_LEN(qs);
  res = rb_ary_new_capa(m);

  if (m == 1) {
    q = RARRAY_AREF(qs, 0);
    rb_ary_push(res, ary_percentile_single(ary, q));
  }
  else {
    sorted = ary_percentile_make_sorted(ary);

    for (i = 0; i < m; ++i) {
      VALUE x;

      q = RARRAY_AREF(qs, i);
      switch (TYPE(q)) {
        case T_FIXNUM:
          d = (double)FIX2LONG(q);
          break;
        case T_BIGNUM:
          d = rb_big2dbl(q);
          break;

        case T_RATIONAL:
          /* fall through */
        default:
          qf = NUM2DBL(q);
          goto float_percentile;

        case T_FLOAT:
          qf = q;
float_percentile:
          d = RFLOAT_VALUE(qf);
          break;
      }

      x = ary_percentile_single_sorted(sorted, n, d);
      rb_ary_push(res, x);
    }
  }

  return res;
}

#stdev(population: false) ⇒ Number

Calculate a standard deviation of the values in ary.

This method is equivalent to:

Math.sqrt(ary.variance(population: population))

Returns:

  • (Number)

    A standard deviation value



1474
1475
1476
1477
1478
1479
1480
# File 'ext/enumerable/statistics/extension/statistics.c', line 1474

static VALUE
ary_stdev(int argc, VALUE* argv, VALUE ary)
{
  VALUE variance = ary_variance(argc, argv, ary);
  VALUE stdev = sqrt_value(variance);
  return stdev;
}

#sumNumber

Calculate the sum of the values in ary. This method utilizes Kahan summation algorithm to compensate the result precision when the ary includes Float values.

Note that This library does not redefine sum method introduced in Ruby 2.4.

Returns:

  • (Number)

    A summation value



647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
# File 'ext/enumerable/statistics/extension/statistics.c', line 647

static VALUE
ary_sum(int argc, VALUE* argv, VALUE ary)
{
  VALUE e, v, r;
  long i, n;
  int block_given;

  if (rb_scan_args(argc, argv, "01", &v) == 0)
    v = LONG2FIX(0);

  block_given = rb_block_given_p();

  if (RARRAY_LEN(ary) == 0)
    return v;

  n = 0;
  r = Qundef;
  for (i = 0; i < RARRAY_LEN(ary); i++) {
    e = RARRAY_AREF(ary, i);
    if (block_given)
      e = rb_yield(e);
    if (FIXNUM_P(e)) {
      n += FIX2LONG(e); /* should not overflow long type */
      if (!FIXABLE(n)) {
        v = rb_big_plus(LONG2NUM(n), v);
        n = 0;
      }
    }
    else if (RB_TYPE_P(e, T_BIGNUM))
      v = rb_big_plus(e, v);
    else if (RB_TYPE_P(e, T_RATIONAL)) {
      if (r == Qundef)
        r = e;
      else
        r = rb_rational_plus(r, e);
    }
    else
      goto not_exact;
  }

  if (n != 0)
    v = rb_fix_plus(LONG2FIX(n), v);
  if (r != Qundef)
    v = rb_rational_plus(r, v);
  return v;

not_exact:
  if (n != 0)
    v = rb_fix_plus(LONG2FIX(n), v);
  if (r != Qundef)
    v = rb_rational_plus(r, v);

  if (RB_FLOAT_TYPE_P(e)) {
    /* Kahan's compensated summation algorithm */
    double f, c;

    f = NUM2DBL(v);
    c = 0.0;
    goto has_float_value;
    for (; i < RARRAY_LEN(ary); i++) {
      double x, y, t;
      e = RARRAY_AREF(ary, i);
      if (block_given)
        e = rb_yield(e);
      if (RB_FLOAT_TYPE_P(e))
        has_float_value:
          x = RFLOAT_VALUE(e);
      else if (FIXNUM_P(e))
        x = FIX2LONG(e);
      else if (RB_TYPE_P(e, T_BIGNUM))
        x = rb_big2dbl(e);
      else if (RB_TYPE_P(e, T_RATIONAL))
        x = rb_num2dbl(e);
      else
        goto not_float;

      y = x - c;
      t = f + y;
      c = (t - f) - y;
      f = t;
    }
    return DBL2NUM(f);

  not_float:
    v = DBL2NUM(f);
  }

  goto has_some_value;
  for (; i < RARRAY_LEN(ary); i++) {
    e = RARRAY_AREF(ary, i);
    if (block_given)
      e = rb_yield(e);
  has_some_value:
    v = rb_funcall(v, idPLUS, 1, e);
  }

  return v;
}

#value_counts(normalize: false, sort: true, ascending: false, dropna: true) ⇒ Hash

Returns a hash that contains the counts of values in ary.

This method treats nil and NaN, the objects who respond true to nan?, as the same thing, and stores the count of them as the value for nil.

Returns:

Parameters:

  • normalize (false, true)

    If true, the result contains the relative frequencies of the unique values.

  • sort (true, false)

    Sort by values.

  • ascending (false, true)

    Sort in ascending order.

  • dropna (true, false)

    Don't include counts of NAs.

Returns:

  • (Hash)

    A hash consists of the counts of the values



2010
2011
2012
2013
2014
# File 'ext/enumerable/statistics/extension/statistics.c', line 2010

static VALUE
ary_value_counts(int argc, VALUE* argv, VALUE ary)
{
  return any_value_counts(argc, argv, ary, ary_value_counts_without_sort);
}

#variance(population: false) ⇒ Number

Calculate a variance of the values in ary. This method scan values in ary only once, and does not cache the values on memory.

When the population: keyword parameter is true, the variance is calculated as a population variance (divided by $n$). The default population: keyword parameter is false; this means the variance is a sample variance (divided by $n-1$).

Returns:

  • (Number)

    A variance value



922
923
924
925
926
927
928
929
930
931
932
933
934
# File 'ext/enumerable/statistics/extension/statistics.c', line 922

static VALUE
ary_variance(int argc, VALUE* argv, VALUE ary)
{
  VALUE opts, variance;
  size_t ddof = 1;

  rb_scan_args(argc, argv, "0:", &opts);
  if (opt_population_p(opts))
    ddof = 0;

  ary_mean_variance(ary, NULL, &variance, ddof);
  return variance;
}