Class: Array

Inherits:
Object
  • Object
show all
Includes:
EnumerableStatistics::ArrayExtension
Defined in:
(unknown)

Instance Method Summary collapse

Methods included from EnumerableStatistics::ArrayExtension

#argmax, #argmin, #find_max, #find_min

Instance Method Details

#histogram(nbins = :auto, weight: nil, closed: :left) ⇒ EnumerableStatistics::Histogram

Returns The histogram struct.

Parameters:

  • The approximate number of bins

  • If :left (the default), the bin interval are left-closed. If :right, the bin interval are right-closed.

Returns:

  • The histogram struct.



2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
# File 'ext/enumerable/statistics/extension/statistics.c', line 2472

static VALUE
ary_histogram(int argc, VALUE *argv, VALUE ary)
{
  VALUE arg0, kwargs, bin_weights;
  long n_bin_weights, i;

  VALUE weight_array = Qnil;
  VALUE edges = Qnil;
  int left_p = 1;

  rb_scan_args(argc, argv, "01:", &arg0, &kwargs);

  if (!NIL_P(kwargs)) {
    enum { kw_weights, kw_edges, kw_closed };
    static ID kwarg_keys[3];
    VALUE kwarg_vals[3];

    if (!kwarg_keys[0]) {
      kwarg_keys[kw_weights] = rb_intern("weights");
      kwarg_keys[kw_edges]  = rb_intern("edges");
      kwarg_keys[kw_closed] = rb_intern("closed");
    }

    rb_get_kwargs(kwargs, kwarg_keys, 0, 3, kwarg_vals);

    weight_array = check_histogram_weight_array(kwarg_vals[kw_weights], RARRAY_LEN(ary));
    edges = check_histogram_edges(kwarg_vals[kw_edges]);
    left_p = check_histogram_left_p(kwarg_vals[kw_closed]);
  }

  if (NIL_P(edges)) {
    edges = ary_histogram_calculate_edge(ary, arg0, left_p);
  }
  else if (! NIL_P(arg0)) {
    rb_raise(rb_eArgError, "Unable to use both `nbins` and `edges` together");
  }

  n_bin_weights = RARRAY_LEN(edges) - 1;
  bin_weights = rb_ary_new_capa(n_bin_weights);
  for (i = 0; i < n_bin_weights; ++i) {
    rb_ary_store(bin_weights, i, INT2FIX(0));
  }

  histogram_weights_push_values(bin_weights, edges, ary, weight_array, left_p);

  return rb_struct_new(cHistogram, edges, bin_weights,
                       left_p ? sym_left : sym_right,
                       Qfalse);
}

#mean(skip_na: false) ⇒ Number

Calculate a mean of the values in ary. This method utilizes Kahan summation algorithm to compensate the result precision when the enum includes Float values.

Returns:

  • A mean value



1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
# File 'ext/enumerable/statistics/extension/statistics.c', line 1004

static VALUE
ary_mean(int argc, VALUE *argv, VALUE ary)
{
  VALUE mean = Qnil, opts;
  int skip_na;

  rb_scan_args(argc, argv, ":", &opts);
  skip_na = opt_skip_na(opts);

  ary_mean_variance(ary, &mean, NULL, 1, skip_na);
  return mean;
}

#mean_stdev(population: false) ⇒ mean, stdev

Calculate a mean and a standard deviation of the values in ary. The first element of the result array is the mean, and the second is the standard deviation.

This method is equivalent to:

def mean_stdev(population: false)
  m, v = mean_variance(population: population)
  [m, Math.sqrt(v)]
end

Returns:



1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
# File 'ext/enumerable/statistics/extension/statistics.c', line 1579

static VALUE
ary_mean_stdev(int argc, VALUE* argv, VALUE ary)
{
  struct variance_opts options;
  VALUE opts, mean, variance;
  size_t ddof = 1;

  rb_scan_args(argc, argv, "0:", &opts);
  get_variance_opts(opts, &options);
  if (options.population)
    ddof = 0;

  ary_mean_variance(ary, &mean, &variance, ddof, options.skip_na);
  VALUE stdev = sqrt_value(variance);
  return rb_assoc_new(mean, stdev);
}

#mean_variance(population: false, skip_na: false) ⇒ mean, variance

Calculate a mean and a variance of the values in ary. The first element of the result array is the mean, and the second is the variance.

When the population: keyword parameter is true, the variance is calculated as a population variance (divided by $n$). The default population: keyword parameter is false; this means the variance is a sample variance (divided by $n-1$).

This method scan values in ary only once, and does not cache the values on memory.

Returns:

  • Two element array consists of mean and variance values



978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
# File 'ext/enumerable/statistics/extension/statistics.c', line 978

static VALUE
ary_mean_variance_m(int argc, VALUE* argv, VALUE ary)
{
  struct variance_opts options;
  VALUE opts, mean = Qnil, variance = Qnil;
  size_t ddof = 1;

  rb_scan_args(argc, argv, "0:", &opts);
  get_variance_opts(opts, &options);
  if (options.population)
    ddof = 0;

  ary_mean_variance(ary, &mean, &variance, ddof, options.skip_na);
  return rb_assoc_new(mean, variance);
}

#medianFloat

Calculate a median of the values in ary.

Returns:

Returns:

  • A median value



1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
# File 'ext/enumerable/statistics/extension/statistics.c', line 1812

static VALUE
ary_median(VALUE ary)
{
  long n;
  VALUE sorted, a0, a1;

  n = RARRAY_LEN(ary);
  switch (n) {
    case 0:
      goto return_nan;
    case 1:
      return RARRAY_AREF(ary, 0);
    case 2:
      a0 = RARRAY_AREF(ary, 0);
      a1 = RARRAY_AREF(ary, 1);
      goto mean_two;
    default:
      break;
  }

  sorted = ary_percentile_make_sorted(ary);

  a0 = RARRAY_AREF(sorted, 0);
  if (is_na(a0)) {
return_nan:
    return DBL2NUM(nan(""));
  }

  a1 = RARRAY_AREF(sorted, n / 2);
  if (n % 2 == 1) {
    return a1;
  }
  else {
    a0 = RARRAY_AREF(sorted, n / 2 - 1);

mean_two:
    a0 = rb_funcall(a0, idPLUS, 1, a1); /* TODO: optimize */
    if (RB_INTEGER_TYPE_P(a0) || RB_FLOAT_TYPE_P(a0) || RB_TYPE_P(a0, T_RATIONAL)) {
      double d = NUM2DBL(a0);
      return DBL2NUM(d / 2.0);
    }

    return rb_funcall(a0, idDIV, 1, DBL2NUM(2.0));
  }
}

#percentile(q) ⇒ Float

Calculate specified percentiles of the values in ary.

Returns:

Parameters:

  • or array of percentiles to compute, which must be between 0 and 100 inclusive.

Returns:

  • A percentile value(s)



1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
# File 'ext/enumerable/statistics/extension/statistics.c', line 1745

static VALUE
ary_percentile(VALUE ary, VALUE q)
{
  long n, m, i;
  double d;
  VALUE qf, qs, sorted, res;

  n = RARRAY_LEN(ary);
  if (n == 0) {
    rb_raise(rb_eArgError, "unable to compute percentile(s) for an empty array");
  }

  qs = rb_check_convert_type(q, T_ARRAY, "Array", "to_ary");
  if (NIL_P(qs)) {
    return ary_percentile_single(ary, q);
  }

  m = RARRAY_LEN(qs);
  res = rb_ary_new_capa(m);

  if (m == 1) {
    q = RARRAY_AREF(qs, 0);
    rb_ary_push(res, ary_percentile_single(ary, q));
  }
  else {
    sorted = ary_percentile_make_sorted(ary);

    for (i = 0; i < m; ++i) {
      VALUE x;

      q = RARRAY_AREF(qs, i);
      switch (TYPE(q)) {
        case T_FIXNUM:
          d = (double)FIX2LONG(q);
          break;
        case T_BIGNUM:
          d = rb_big2dbl(q);
          break;

        case T_RATIONAL:
          /* fall through */
        default:
          qf = NUM2DBL(q);
          goto float_percentile;

        case T_FLOAT:
          qf = q;
float_percentile:
          d = RFLOAT_VALUE(qf);
          break;
      }

      x = ary_percentile_single_sorted(sorted, n, d);
      rb_ary_push(res, x);
    }
  }

  return res;
}

#stdev(population: false) ⇒ Number

Calculate a standard deviation of the values in ary.

This method is equivalent to:

Math.sqrt(ary.variance(population: population))

Returns:

  • A standard deviation value



1609
1610
1611
1612
1613
1614
1615
# File 'ext/enumerable/statistics/extension/statistics.c', line 1609

static VALUE
ary_stdev(int argc, VALUE* argv, VALUE ary)
{
  VALUE variance = ary_variance(argc, argv, ary);
  VALUE stdev = sqrt_value(variance);
  return stdev;
}

#sum(skip_na: false) ⇒ Number

Calculate the sum of the values in ary. This method utilizes Kahan summation algorithm to compensate the result precision when the ary includes Float values.

Redefines sum (Ruby >= 2.4). Original is aliased as __sum__.

Returns:

  • A summation value



807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
# File 'ext/enumerable/statistics/extension/statistics.c', line 807

static VALUE
ary_sum(int argc, VALUE* argv, VALUE ary)
{
  VALUE v, opts;
  int skip_na;

  if (rb_scan_args(argc, argv, "01:", &v, &opts) == 0) {
    v = LONG2FIX(0);
  }
  skip_na = opt_skip_na(opts);

#ifndef HAVE_ENUM_SUM
  if (!skip_na) {
    return rb_funcall(ary, id_builtin_sum, argc, &v);
  }
#endif

  return ary_calculate_sum(ary, v, skip_na, NULL);
}

#value_counts(normalize: false, sort: true, ascending: false, dropna: true) ⇒ Hash

Returns a hash that contains the counts of values in ary.

This method treats nil and NaN, the objects who respond true to nan?, as the same thing, and stores the count of them as the value for nil.

Returns:

Parameters:

  • If true, the result contains the relative frequencies of the unique values.

  • Sort by values.

  • Sort in ascending order.

  • Don't include counts of NAs.

Returns:

  • A hash consists of the counts of the values



2130
2131
2132
2133
2134
# File 'ext/enumerable/statistics/extension/statistics.c', line 2130

static VALUE
ary_value_counts(int argc, VALUE* argv, VALUE ary)
{
  return any_value_counts(argc, argv, ary, ary_value_counts_without_sort);
}

#variance(population: false, skip_na: false) ⇒ Number

Calculate a variance of the values in ary. This method scan values in ary only once, and does not cache the values on memory.

When the population: keyword parameter is true, the variance is calculated as a population variance (divided by $n$). The default population: keyword parameter is false; this means the variance is a sample variance (divided by $n-1$).

Returns:

  • A variance value



1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
# File 'ext/enumerable/statistics/extension/statistics.c', line 1031

static VALUE
ary_variance(int argc, VALUE* argv, VALUE ary)
{
  struct variance_opts options;
  VALUE opts, variance;
  size_t ddof = 1;

  rb_scan_args(argc, argv, "0:", &opts);
  get_variance_opts(opts, &options);
  if (options.population)
    ddof = 0;

  ary_mean_variance(ary, NULL, &variance, ddof, options.skip_na);
  return variance;
}