Class: Array
- Inherits:
-
Object
- Object
- Array
- Includes:
- EnumerableStatistics::ArrayExtension
- Defined in:
- (unknown)
Instance Method Summary collapse
-
#histogram(nbins = :auto, weight: nil, closed: :left) ⇒ EnumerableStatistics::Histogram
The histogram struct.
-
#mean(skip_na: false) ⇒ Number
Calculate a mean of the values in
ary. -
#mean_stdev(population: false) ⇒ mean, stdev
Calculate a mean and a standard deviation of the values in
ary. -
#mean_variance(population: false, skip_na: false) ⇒ mean, variance
Calculate a mean and a variance of the values in
ary. -
#median ⇒ Float
Calculate a median of the values in
ary. -
#percentile(q) ⇒ Float
Calculate specified percentiles of the values in
ary. -
#stdev(population: false) ⇒ Number
Calculate a standard deviation of the values in
ary. -
#sum(skip_na: false) ⇒ Number
Calculate the sum of the values in
ary. -
#value_counts(normalize: false, sort: true, ascending: false, dropna: true) ⇒ Hash
Returns a hash that contains the counts of values in
ary. -
#variance(population: false, skip_na: false) ⇒ Number
Calculate a variance of the values in
ary.
Methods included from EnumerableStatistics::ArrayExtension
#argmax, #argmin, #find_max, #find_min
Instance Method Details
#histogram(nbins = :auto, weight: nil, closed: :left) ⇒ EnumerableStatistics::Histogram
Returns The histogram struct.
2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 |
# File 'ext/enumerable/statistics/extension/statistics.c', line 2472
static VALUE
ary_histogram(int argc, VALUE *argv, VALUE ary)
{
VALUE arg0, kwargs, bin_weights;
long n_bin_weights, i;
VALUE weight_array = Qnil;
VALUE edges = Qnil;
int left_p = 1;
rb_scan_args(argc, argv, "01:", &arg0, &kwargs);
if (!NIL_P(kwargs)) {
enum { kw_weights, kw_edges, kw_closed };
static ID kwarg_keys[3];
VALUE kwarg_vals[3];
if (!kwarg_keys[0]) {
kwarg_keys[kw_weights] = rb_intern("weights");
kwarg_keys[kw_edges] = rb_intern("edges");
kwarg_keys[kw_closed] = rb_intern("closed");
}
rb_get_kwargs(kwargs, kwarg_keys, 0, 3, kwarg_vals);
weight_array = check_histogram_weight_array(kwarg_vals[kw_weights], RARRAY_LEN(ary));
edges = check_histogram_edges(kwarg_vals[kw_edges]);
left_p = check_histogram_left_p(kwarg_vals[kw_closed]);
}
if (NIL_P(edges)) {
edges = ary_histogram_calculate_edge(ary, arg0, left_p);
}
else if (! NIL_P(arg0)) {
rb_raise(rb_eArgError, "Unable to use both `nbins` and `edges` together");
}
n_bin_weights = RARRAY_LEN(edges) - 1;
bin_weights = rb_ary_new_capa(n_bin_weights);
for (i = 0; i < n_bin_weights; ++i) {
rb_ary_store(bin_weights, i, INT2FIX(0));
}
histogram_weights_push_values(bin_weights, edges, ary, weight_array, left_p);
return rb_struct_new(cHistogram, edges, bin_weights,
left_p ? sym_left : sym_right,
Qfalse);
}
|
#mean(skip_na: false) ⇒ Number
Calculate a mean of the values in ary.
This method utilizes
Kahan summation algorithm
to compensate the result precision when the enum includes Float values.
1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 |
# File 'ext/enumerable/statistics/extension/statistics.c', line 1004
static VALUE
ary_mean(int argc, VALUE *argv, VALUE ary)
{
VALUE mean = Qnil, opts;
int skip_na;
rb_scan_args(argc, argv, ":", &opts);
skip_na = opt_skip_na(opts);
ary_mean_variance(ary, &mean, NULL, 1, skip_na);
return mean;
}
|
#mean_stdev(population: false) ⇒ mean, stdev
Calculate a mean and a standard deviation of the values in ary.
The first element of the result array is the mean,
and the second is the standard deviation.
This method is equivalent to:
def mean_stdev(population: false)
m, v = mean_variance(population: population)
[m, Math.sqrt(v)]
end
1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 |
# File 'ext/enumerable/statistics/extension/statistics.c', line 1579
static VALUE
ary_mean_stdev(int argc, VALUE* argv, VALUE ary)
{
struct variance_opts options;
VALUE opts, mean, variance;
size_t ddof = 1;
rb_scan_args(argc, argv, "0:", &opts);
get_variance_opts(opts, &options);
if (options.population)
ddof = 0;
ary_mean_variance(ary, &mean, &variance, ddof, options.skip_na);
VALUE stdev = sqrt_value(variance);
return rb_assoc_new(mean, stdev);
}
|
#mean_variance(population: false, skip_na: false) ⇒ mean, variance
Calculate a mean and a variance of the values in ary.
The first element of the result array is the mean, and the second is the variance.
When the population: keyword parameter is true,
the variance is calculated as a population variance (divided by $n$).
The default population: keyword parameter is false;
this means the variance is a sample variance (divided by $n-1$).
This method scan values in ary only once,
and does not cache the values on memory.
978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 |
# File 'ext/enumerable/statistics/extension/statistics.c', line 978
static VALUE
ary_mean_variance_m(int argc, VALUE* argv, VALUE ary)
{
struct variance_opts options;
VALUE opts, mean = Qnil, variance = Qnil;
size_t ddof = 1;
rb_scan_args(argc, argv, "0:", &opts);
get_variance_opts(opts, &options);
if (options.population)
ddof = 0;
ary_mean_variance(ary, &mean, &variance, ddof, options.skip_na);
return rb_assoc_new(mean, variance);
}
|
#median ⇒ Float
Calculate a median of the values in ary.
1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 |
# File 'ext/enumerable/statistics/extension/statistics.c', line 1812
static VALUE
ary_median(VALUE ary)
{
long n;
VALUE sorted, a0, a1;
n = RARRAY_LEN(ary);
switch (n) {
case 0:
goto return_nan;
case 1:
return RARRAY_AREF(ary, 0);
case 2:
a0 = RARRAY_AREF(ary, 0);
a1 = RARRAY_AREF(ary, 1);
goto mean_two;
default:
break;
}
sorted = ary_percentile_make_sorted(ary);
a0 = RARRAY_AREF(sorted, 0);
if (is_na(a0)) {
return_nan:
return DBL2NUM(nan(""));
}
a1 = RARRAY_AREF(sorted, n / 2);
if (n % 2 == 1) {
return a1;
}
else {
a0 = RARRAY_AREF(sorted, n / 2 - 1);
mean_two:
a0 = rb_funcall(a0, idPLUS, 1, a1); /* TODO: optimize */
if (RB_INTEGER_TYPE_P(a0) || RB_FLOAT_TYPE_P(a0) || RB_TYPE_P(a0, T_RATIONAL)) {
double d = NUM2DBL(a0);
return DBL2NUM(d / 2.0);
}
return rb_funcall(a0, idDIV, 1, DBL2NUM(2.0));
}
}
|
#percentile(q) ⇒ Float
Calculate specified percentiles of the values in ary.
1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 |
# File 'ext/enumerable/statistics/extension/statistics.c', line 1745
static VALUE
ary_percentile(VALUE ary, VALUE q)
{
long n, m, i;
double d;
VALUE qf, qs, sorted, res;
n = RARRAY_LEN(ary);
if (n == 0) {
rb_raise(rb_eArgError, "unable to compute percentile(s) for an empty array");
}
qs = rb_check_convert_type(q, T_ARRAY, "Array", "to_ary");
if (NIL_P(qs)) {
return ary_percentile_single(ary, q);
}
m = RARRAY_LEN(qs);
res = rb_ary_new_capa(m);
if (m == 1) {
q = RARRAY_AREF(qs, 0);
rb_ary_push(res, ary_percentile_single(ary, q));
}
else {
sorted = ary_percentile_make_sorted(ary);
for (i = 0; i < m; ++i) {
VALUE x;
q = RARRAY_AREF(qs, i);
switch (TYPE(q)) {
case T_FIXNUM:
d = (double)FIX2LONG(q);
break;
case T_BIGNUM:
d = rb_big2dbl(q);
break;
case T_RATIONAL:
/* fall through */
default:
qf = NUM2DBL(q);
goto float_percentile;
case T_FLOAT:
qf = q;
float_percentile:
d = RFLOAT_VALUE(qf);
break;
}
x = ary_percentile_single_sorted(sorted, n, d);
rb_ary_push(res, x);
}
}
return res;
}
|
#stdev(population: false) ⇒ Number
Calculate a standard deviation of the values in ary.
This method is equivalent to:
Math.sqrt(ary.variance(population: population))
1609 1610 1611 1612 1613 1614 1615 |
# File 'ext/enumerable/statistics/extension/statistics.c', line 1609
static VALUE
ary_stdev(int argc, VALUE* argv, VALUE ary)
{
VALUE variance = ary_variance(argc, argv, ary);
VALUE stdev = sqrt_value(variance);
return stdev;
}
|
#sum(skip_na: false) ⇒ Number
Calculate the sum of the values in ary.
This method utilizes
Kahan summation algorithm
to compensate the result precision when the ary includes Float values.
Redefines sum (Ruby >= 2.4). Original is aliased as __sum__.
807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 |
# File 'ext/enumerable/statistics/extension/statistics.c', line 807
static VALUE
ary_sum(int argc, VALUE* argv, VALUE ary)
{
VALUE v, opts;
int skip_na;
if (rb_scan_args(argc, argv, "01:", &v, &opts) == 0) {
v = LONG2FIX(0);
}
skip_na = opt_skip_na(opts);
#ifndef HAVE_ENUM_SUM
if (!skip_na) {
return rb_funcall(ary, id_builtin_sum, argc, &v);
}
#endif
return ary_calculate_sum(ary, v, skip_na, NULL);
}
|
#value_counts(normalize: false, sort: true, ascending: false, dropna: true) ⇒ Hash
Returns a hash that contains the counts of values in ary.
This method treats nil and NaN, the objects who respond true to nan?,
as the same thing, and stores the count of them as the value for nil.
2130 2131 2132 2133 2134 |
# File 'ext/enumerable/statistics/extension/statistics.c', line 2130
static VALUE
ary_value_counts(int argc, VALUE* argv, VALUE ary)
{
return any_value_counts(argc, argv, ary, ary_value_counts_without_sort);
}
|
#variance(population: false, skip_na: false) ⇒ Number
Calculate a variance of the values in ary.
This method scan values in ary only once,
and does not cache the values on memory.
When the population: keyword parameter is true,
the variance is calculated as a population variance (divided by $n$).
The default population: keyword parameter is false;
this means the variance is a sample variance (divided by $n-1$).
1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 |
# File 'ext/enumerable/statistics/extension/statistics.c', line 1031
static VALUE
ary_variance(int argc, VALUE* argv, VALUE ary)
{
struct variance_opts options;
VALUE opts, variance;
size_t ddof = 1;
rb_scan_args(argc, argv, "0:", &opts);
get_variance_opts(opts, &options);
if (options.population)
ddof = 0;
ary_mean_variance(ary, NULL, &variance, ddof, options.skip_na);
return variance;
}
|