Class: Array
- Inherits:
-
Object
- Object
- Array
- Includes:
- EnumerableStatistics::ArrayExtension
- Defined in:
- (unknown)
Instance Method Summary collapse
-
#histogram(nbins = :auto, weight: nil, closed: :left) ⇒ EnumerableStatistics::Histogram
The histogram struct.
-
#mean ⇒ Number
Calculate a mean of the values in
ary. -
#mean_stdev(population: false) ⇒ mean, stdev
Calculate a mean and a standard deviation of the values in
ary. -
#mean_variance(population: false) ⇒ mean, variance
Calculate a mean and a variance of the values in
ary. -
#median ⇒ Float
Calculate a median of the values in
ary. -
#percentile(q) ⇒ Float
Calculate specified percentiles of the values in
ary. -
#stdev(population: false) ⇒ Number
Calculate a standard deviation of the values in
ary. -
#sum ⇒ Number
Calculate the sum of the values in
ary. -
#value_counts(normalize: false, sort: true, ascending: false, dropna: true) ⇒ Hash
Returns a hash that contains the counts of values in
ary. -
#variance(population: false) ⇒ Number
Calculate a variance of the values in
ary.
Methods included from EnumerableStatistics::ArrayExtension
#argmax, #argmin, #find_max, #find_min
Instance Method Details
#histogram(nbins = :auto, weight: nil, closed: :left) ⇒ EnumerableStatistics::Histogram
Returns The histogram struct.
2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 |
# File 'ext/enumerable/statistics/extension/statistics.c', line 2330
static VALUE
ary_histogram(int argc, VALUE *argv, VALUE ary)
{
VALUE arg0, kwargs, bin_weights;
long nbins, n_bin_weights, i;
VALUE weight_array = Qnil;
VALUE edges = Qnil;
int left_p = 1;
rb_scan_args(argc, argv, "01:", &arg0, &kwargs);
if (NIL_P(arg0) || arg0 == sym_auto) {
nbins = sturges(RARRAY_LEN(ary));
}
else {
nbins = NUM2LONG(arg0);
}
if (!NIL_P(kwargs)) {
enum { kw_weights, kw_edges, kw_closed };
static ID kwarg_keys[3];
VALUE kwarg_vals[3];
if (!kwarg_keys[0]) {
kwarg_keys[kw_weights] = rb_intern("weights");
kwarg_keys[kw_edges] = rb_intern("edges");
kwarg_keys[kw_closed] = rb_intern("closed");
}
rb_get_kwargs(kwargs, kwarg_keys, 0, 3, kwarg_vals);
weight_array = check_histogram_weight_array(kwarg_vals[kw_weights], RARRAY_LEN(ary));
edges = check_histogram_edges(kwarg_vals[kw_edges]);
left_p = check_histogram_left_p(kwarg_vals[kw_closed]);
}
if (NIL_P(edges)) {
edges = ary_histogram_calculate_edge(ary, nbins, left_p);
}
n_bin_weights = RARRAY_LEN(edges) - 1;
bin_weights = rb_ary_new_capa(n_bin_weights);
for (i = 0; i < n_bin_weights; ++i) {
rb_ary_store(bin_weights, i, INT2FIX(0));
}
histogram_weights_push_values(bin_weights, edges, ary, weight_array, left_p);
return rb_struct_new(cHistogram, edges, bin_weights,
left_p ? sym_left : sym_right,
Qfalse);
}
|
#mean ⇒ Number
Calculate a mean of the values in ary.
This method utilizes
Kahan summation algorithm
to compensate the result precision when the enum includes Float values.
900 901 902 903 904 905 906 |
# File 'ext/enumerable/statistics/extension/statistics.c', line 900
static VALUE
ary_mean(VALUE ary)
{
VALUE mean;
ary_mean_variance(ary, &mean, NULL, 1);
return mean;
}
|
#mean_stdev(population: false) ⇒ mean, stdev
Calculate a mean and a standard deviation of the values in ary.
The first element of the result array is the mean,
and the second is the standard deviation.
This method is equivalent to:
def mean_stdev(population: false)
m, v = mean_variance(population: population)
[m, Math.sqrt(v)]
end
1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 |
# File 'ext/enumerable/statistics/extension/statistics.c', line 1446
static VALUE
ary_mean_stdev(int argc, VALUE* argv, VALUE ary)
{
VALUE opts, mean, variance;
size_t ddof = 1;
rb_scan_args(argc, argv, "0:", &opts);
if (opt_population_p(opts))
ddof = 0;
ary_mean_variance(ary, &mean, &variance, ddof);
VALUE stdev = sqrt_value(variance);
return rb_assoc_new(mean, stdev);
}
|
#mean_variance(population: false) ⇒ mean, variance
Calculate a mean and a variance of the values in ary.
The first element of the result array is the mean, and the second is the variance.
When the population: keyword parameter is true,
the variance is calculated as a population variance (divided by $n$).
The default population: keyword parameter is false;
this means the variance is a sample variance (divided by $n-1$).
This method scan values in ary only once,
and does not cache the values on memory.
876 877 878 879 880 881 882 883 884 885 886 887 888 |
# File 'ext/enumerable/statistics/extension/statistics.c', line 876
static VALUE
ary_mean_variance_m(int argc, VALUE* argv, VALUE ary)
{
VALUE opts, mean, variance;
size_t ddof = 1;
rb_scan_args(argc, argv, "0:", &opts);
if (opt_population_p(opts))
ddof = 0;
ary_mean_variance(ary, &mean, &variance, ddof);
return rb_assoc_new(mean, variance);
}
|
#median ⇒ Float
Calculate a median of the values in ary.
1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 |
# File 'ext/enumerable/statistics/extension/statistics.c', line 1692
static VALUE
ary_median(VALUE ary)
{
long n;
VALUE sorted, a0, a1;
n = RARRAY_LEN(ary);
switch (n) {
case 0:
goto return_nan;
case 1:
return RARRAY_AREF(ary, 0);
case 2:
a0 = RARRAY_AREF(ary, 0);
a1 = RARRAY_AREF(ary, 1);
goto mean_two;
default:
break;
}
sorted = ary_percentile_make_sorted(ary);
a0 = RARRAY_AREF(sorted, 0);
if (is_na(a0)) {
return_nan:
return DBL2NUM(nan(""));
}
a1 = RARRAY_AREF(sorted, n / 2);
if (n % 2 == 1) {
return a1;
}
else {
a0 = RARRAY_AREF(sorted, n / 2 - 1);
mean_two:
a0 = rb_funcall(a0, idPLUS, 1, a1); /* TODO: optimize */
if (RB_INTEGER_TYPE_P(a0) || RB_FLOAT_TYPE_P(a0) || RB_TYPE_P(a0, T_RATIONAL)) {
double d = NUM2DBL(a0);
return DBL2NUM(d / 2.0);
}
return rb_funcall(a0, idDIV, 1, DBL2NUM(2.0));
}
}
|
#percentile(q) ⇒ Float
Calculate specified percentiles of the values in ary.
1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 |
# File 'ext/enumerable/statistics/extension/statistics.c', line 1625
static VALUE
ary_percentile(VALUE ary, VALUE q)
{
long n, m, i;
double d;
VALUE qf, qs, sorted, res;
n = RARRAY_LEN(ary);
if (n == 0) {
rb_raise(rb_eArgError, "unable to compute percentile(s) for an empty array");
}
qs = rb_check_convert_type(q, T_ARRAY, "Array", "to_ary");
if (NIL_P(qs)) {
return ary_percentile_single(ary, q);
}
m = RARRAY_LEN(qs);
res = rb_ary_new_capa(m);
if (m == 1) {
q = RARRAY_AREF(qs, 0);
rb_ary_push(res, ary_percentile_single(ary, q));
}
else {
sorted = ary_percentile_make_sorted(ary);
for (i = 0; i < m; ++i) {
VALUE x;
q = RARRAY_AREF(qs, i);
switch (TYPE(q)) {
case T_FIXNUM:
d = (double)FIX2LONG(q);
break;
case T_BIGNUM:
d = rb_big2dbl(q);
break;
case T_RATIONAL:
/* fall through */
default:
qf = NUM2DBL(q);
goto float_percentile;
case T_FLOAT:
qf = q;
float_percentile:
d = RFLOAT_VALUE(qf);
break;
}
x = ary_percentile_single_sorted(sorted, n, d);
rb_ary_push(res, x);
}
}
return res;
}
|
#stdev(population: false) ⇒ Number
Calculate a standard deviation of the values in ary.
This method is equivalent to:
Math.sqrt(ary.variance(population: population))
1474 1475 1476 1477 1478 1479 1480 |
# File 'ext/enumerable/statistics/extension/statistics.c', line 1474
static VALUE
ary_stdev(int argc, VALUE* argv, VALUE ary)
{
VALUE variance = ary_variance(argc, argv, ary);
VALUE stdev = sqrt_value(variance);
return stdev;
}
|
#sum ⇒ Number
Calculate the sum of the values in ary.
This method utilizes
Kahan summation algorithm
to compensate the result precision when the ary includes Float values.
Note that This library does not redefine sum method introduced in Ruby 2.4.
647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 |
# File 'ext/enumerable/statistics/extension/statistics.c', line 647
static VALUE
ary_sum(int argc, VALUE* argv, VALUE ary)
{
VALUE e, v, r;
long i, n;
int block_given;
if (rb_scan_args(argc, argv, "01", &v) == 0)
v = LONG2FIX(0);
block_given = rb_block_given_p();
if (RARRAY_LEN(ary) == 0)
return v;
n = 0;
r = Qundef;
for (i = 0; i < RARRAY_LEN(ary); i++) {
e = RARRAY_AREF(ary, i);
if (block_given)
e = rb_yield(e);
if (FIXNUM_P(e)) {
n += FIX2LONG(e); /* should not overflow long type */
if (!FIXABLE(n)) {
v = rb_big_plus(LONG2NUM(n), v);
n = 0;
}
}
else if (RB_TYPE_P(e, T_BIGNUM))
v = rb_big_plus(e, v);
else if (RB_TYPE_P(e, T_RATIONAL)) {
if (r == Qundef)
r = e;
else
r = rb_rational_plus(r, e);
}
else
goto not_exact;
}
if (n != 0)
v = rb_fix_plus(LONG2FIX(n), v);
if (r != Qundef)
v = rb_rational_plus(r, v);
return v;
not_exact:
if (n != 0)
v = rb_fix_plus(LONG2FIX(n), v);
if (r != Qundef)
v = rb_rational_plus(r, v);
if (RB_FLOAT_TYPE_P(e)) {
/* Kahan's compensated summation algorithm */
double f, c;
f = NUM2DBL(v);
c = 0.0;
goto has_float_value;
for (; i < RARRAY_LEN(ary); i++) {
double x, y, t;
e = RARRAY_AREF(ary, i);
if (block_given)
e = rb_yield(e);
if (RB_FLOAT_TYPE_P(e))
has_float_value:
x = RFLOAT_VALUE(e);
else if (FIXNUM_P(e))
x = FIX2LONG(e);
else if (RB_TYPE_P(e, T_BIGNUM))
x = rb_big2dbl(e);
else if (RB_TYPE_P(e, T_RATIONAL))
x = rb_num2dbl(e);
else
goto not_float;
y = x - c;
t = f + y;
c = (t - f) - y;
f = t;
}
return DBL2NUM(f);
not_float:
v = DBL2NUM(f);
}
goto has_some_value;
for (; i < RARRAY_LEN(ary); i++) {
e = RARRAY_AREF(ary, i);
if (block_given)
e = rb_yield(e);
has_some_value:
v = rb_funcall(v, idPLUS, 1, e);
}
return v;
}
|
#value_counts(normalize: false, sort: true, ascending: false, dropna: true) ⇒ Hash
Returns a hash that contains the counts of values in ary.
This method treats nil and NaN, the objects who respond true to nan?,
as the same thing, and stores the count of them as the value for nil.
2010 2011 2012 2013 2014 |
# File 'ext/enumerable/statistics/extension/statistics.c', line 2010
static VALUE
ary_value_counts(int argc, VALUE* argv, VALUE ary)
{
return any_value_counts(argc, argv, ary, ary_value_counts_without_sort);
}
|
#variance(population: false) ⇒ Number
Calculate a variance of the values in ary.
This method scan values in ary only once,
and does not cache the values on memory.
When the population: keyword parameter is true,
the variance is calculated as a population variance (divided by $n$).
The default population: keyword parameter is false;
this means the variance is a sample variance (divided by $n-1$).
922 923 924 925 926 927 928 929 930 931 932 933 934 |
# File 'ext/enumerable/statistics/extension/statistics.c', line 922
static VALUE
ary_variance(int argc, VALUE* argv, VALUE ary)
{
VALUE opts, variance;
size_t ddof = 1;
rb_scan_args(argc, argv, "0:", &opts);
if (opt_population_p(opts))
ddof = 0;
ary_mean_variance(ary, NULL, &variance, ddof);
return variance;
}
|