Class: Array
- Inherits:
-
Object
- Object
- Array
- Includes:
- EnumerableStatistics::ArrayExtension
- Defined in:
- (unknown)
Instance Method Summary collapse
-
#histogram(nbins = :auto, weight: nil, closed: :left) ⇒ EnumerableStatistics::Histogram
The histogram struct.
-
#mean(skip_na: false) ⇒ Number
Calculate a mean of the values in
ary. -
#mean_stdev(population: false) ⇒ mean, stdev
Calculate a mean and a standard deviation of the values in
ary. -
#mean_variance(population: false, skip_na: false) ⇒ mean, variance
Calculate a mean and a variance of the values in
ary. -
#median ⇒ Float
Calculate a median of the values in
ary. -
#percentile(q) ⇒ Float
Calculate specified percentiles of the values in
ary. -
#stdev(population: false) ⇒ Number
Calculate a standard deviation of the values in
ary. -
#sum(skip_na: false) ⇒ Number
Calculate the sum of the values in
ary. -
#value_counts(normalize: false, sort: true, ascending: false, dropna: true) ⇒ Hash
Returns a hash that contains the counts of values in
ary. -
#variance(population: false, skip_na: false) ⇒ Number
Calculate a variance of the values in
ary.
Methods included from EnumerableStatistics::ArrayExtension
#argmax, #argmin, #find_max, #find_min
Instance Method Details
#histogram(nbins = :auto, weight: nil, closed: :left) ⇒ EnumerableStatistics::Histogram
Returns The histogram struct.
2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 |
# File 'ext/enumerable/statistics/extension/statistics.c', line 2471 static VALUE ary_histogram(int argc, VALUE *argv, VALUE ary) { VALUE arg0, kwargs, bin_weights; long n_bin_weights, i; VALUE weight_array = Qnil; VALUE edges = Qnil; int left_p = 1; rb_scan_args(argc, argv, "01:", &arg0, &kwargs); if (!NIL_P(kwargs)) { enum { kw_weights, kw_edges, kw_closed }; static ID kwarg_keys[3]; VALUE kwarg_vals[3]; if (!kwarg_keys[0]) { kwarg_keys[kw_weights] = rb_intern("weights"); kwarg_keys[kw_edges] = rb_intern("edges"); kwarg_keys[kw_closed] = rb_intern("closed"); } rb_get_kwargs(kwargs, kwarg_keys, 0, 3, kwarg_vals); weight_array = check_histogram_weight_array(kwarg_vals[kw_weights], RARRAY_LEN(ary)); edges = check_histogram_edges(kwarg_vals[kw_edges]); left_p = check_histogram_left_p(kwarg_vals[kw_closed]); } if (NIL_P(edges)) { edges = ary_histogram_calculate_edge(ary, arg0, left_p); } else if (! NIL_P(arg0)) { rb_raise(rb_eArgError, "Unable to use both `nbins` and `edges` together"); } n_bin_weights = RARRAY_LEN(edges) - 1; bin_weights = rb_ary_new_capa(n_bin_weights); for (i = 0; i < n_bin_weights; ++i) { rb_ary_store(bin_weights, i, INT2FIX(0)); } histogram_weights_push_values(bin_weights, edges, ary, weight_array, left_p); return rb_struct_new(cHistogram, edges, bin_weights, left_p ? sym_left : sym_right, Qfalse); } |
#mean(skip_na: false) ⇒ Number
Calculate a mean of the values in ary.
This method utilizes
Kahan summation algorithm
to compensate the result precision when the enum includes Float values.
1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 |
# File 'ext/enumerable/statistics/extension/statistics.c', line 1003 static VALUE ary_mean(int argc, VALUE *argv, VALUE ary) { VALUE mean = Qnil, opts; int skip_na; rb_scan_args(argc, argv, ":", &opts); skip_na = opt_skip_na(opts); ary_mean_variance(ary, &mean, NULL, 1, skip_na); return mean; } |
#mean_stdev(population: false) ⇒ mean, stdev
Calculate a mean and a standard deviation of the values in ary.
The first element of the result array is the mean,
and the second is the standard deviation.
This method is equivalent to:
def mean_stdev(population: false)
m, v = mean_variance(population: population)
[m, Math.sqrt(v)]
end
1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 |
# File 'ext/enumerable/statistics/extension/statistics.c', line 1578 static VALUE ary_mean_stdev(int argc, VALUE* argv, VALUE ary) { struct variance_opts ; VALUE opts, mean, variance; size_t ddof = 1; rb_scan_args(argc, argv, "0:", &opts); get_variance_opts(opts, &); if (.population) ddof = 0; ary_mean_variance(ary, &mean, &variance, ddof, .skip_na); VALUE stdev = sqrt_value(variance); return rb_assoc_new(mean, stdev); } |
#mean_variance(population: false, skip_na: false) ⇒ mean, variance
Calculate a mean and a variance of the values in ary.
The first element of the result array is the mean, and the second is the variance.
When the population: keyword parameter is true,
the variance is calculated as a population variance (divided by $n$).
The default population: keyword parameter is false;
this means the variance is a sample variance (divided by $n-1$).
This method scan values in ary only once,
and does not cache the values on memory.
977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 |
# File 'ext/enumerable/statistics/extension/statistics.c', line 977 static VALUE ary_mean_variance_m(int argc, VALUE* argv, VALUE ary) { struct variance_opts ; VALUE opts, mean = Qnil, variance = Qnil; size_t ddof = 1; rb_scan_args(argc, argv, "0:", &opts); get_variance_opts(opts, &); if (.population) ddof = 0; ary_mean_variance(ary, &mean, &variance, ddof, .skip_na); return rb_assoc_new(mean, variance); } |
#median ⇒ Float
Calculate a median of the values in ary.
1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 |
# File 'ext/enumerable/statistics/extension/statistics.c', line 1811 static VALUE ary_median(VALUE ary) { long n; VALUE sorted, a0, a1; n = RARRAY_LEN(ary); switch (n) { case 0: goto return_nan; case 1: return RARRAY_AREF(ary, 0); case 2: a0 = RARRAY_AREF(ary, 0); a1 = RARRAY_AREF(ary, 1); goto mean_two; default: break; } sorted = ary_percentile_make_sorted(ary); a0 = RARRAY_AREF(sorted, 0); if (is_na(a0)) { return_nan: return DBL2NUM(nan("")); } a1 = RARRAY_AREF(sorted, n / 2); if (n % 2 == 1) { return a1; } else { a0 = RARRAY_AREF(sorted, n / 2 - 1); mean_two: a0 = rb_funcall(a0, idPLUS, 1, a1); /* TODO: optimize */ if (RB_INTEGER_TYPE_P(a0) || RB_FLOAT_TYPE_P(a0) || RB_TYPE_P(a0, T_RATIONAL)) { double d = NUM2DBL(a0); return DBL2NUM(d / 2.0); } return rb_funcall(a0, idDIV, 1, DBL2NUM(2.0)); } } |
#percentile(q) ⇒ Float
Calculate specified percentiles of the values in ary.
1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 |
# File 'ext/enumerable/statistics/extension/statistics.c', line 1744 static VALUE ary_percentile(VALUE ary, VALUE q) { long n, m, i; double d; VALUE qf, qs, sorted, res; n = RARRAY_LEN(ary); if (n == 0) { rb_raise(rb_eArgError, "unable to compute percentile(s) for an empty array"); } qs = rb_check_convert_type(q, T_ARRAY, "Array", "to_ary"); if (NIL_P(qs)) { return ary_percentile_single(ary, q); } m = RARRAY_LEN(qs); res = rb_ary_new_capa(m); if (m == 1) { q = RARRAY_AREF(qs, 0); rb_ary_push(res, ary_percentile_single(ary, q)); } else { sorted = ary_percentile_make_sorted(ary); for (i = 0; i < m; ++i) { VALUE x; q = RARRAY_AREF(qs, i); switch (TYPE(q)) { case T_FIXNUM: d = (double)FIX2LONG(q); break; case T_BIGNUM: d = rb_big2dbl(q); break; case T_RATIONAL: /* fall through */ default: qf = NUM2DBL(q); goto float_percentile; case T_FLOAT: qf = q; float_percentile: d = RFLOAT_VALUE(qf); break; } x = ary_percentile_single_sorted(sorted, n, d); rb_ary_push(res, x); } } return res; } |
#stdev(population: false) ⇒ Number
Calculate a standard deviation of the values in ary.
This method is equivalent to:
Math.sqrt(ary.variance(population: population))
1608 1609 1610 1611 1612 1613 1614 |
# File 'ext/enumerable/statistics/extension/statistics.c', line 1608 static VALUE ary_stdev(int argc, VALUE* argv, VALUE ary) { VALUE variance = ary_variance(argc, argv, ary); VALUE stdev = sqrt_value(variance); return stdev; } |
#sum(skip_na: false) ⇒ Number
Calculate the sum of the values in ary.
This method utilizes
Kahan summation algorithm
to compensate the result precision when the ary includes Float values.
Note that This library does not redefine sum method introduced in Ruby 2.4.
806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 |
# File 'ext/enumerable/statistics/extension/statistics.c', line 806 static VALUE ary_sum(int argc, VALUE* argv, VALUE ary) { VALUE v, opts; int skip_na; if (rb_scan_args(argc, argv, "01:", &v, &opts) == 0) { v = LONG2FIX(0); } skip_na = opt_skip_na(opts); #ifndef HAVE_ENUM_SUM if (!skip_na) { return rb_funcall(orig_ary_sum, rb_intern("call"), argc, &v); } #endif return ary_calculate_sum(ary, v, skip_na, NULL); } |
#value_counts(normalize: false, sort: true, ascending: false, dropna: true) ⇒ Hash
Returns a hash that contains the counts of values in ary.
This method treats nil and NaN, the objects who respond true to nan?,
as the same thing, and stores the count of them as the value for nil.
2129 2130 2131 2132 2133 |
# File 'ext/enumerable/statistics/extension/statistics.c', line 2129 static VALUE ary_value_counts(int argc, VALUE* argv, VALUE ary) { return any_value_counts(argc, argv, ary, ary_value_counts_without_sort); } |
#variance(population: false, skip_na: false) ⇒ Number
Calculate a variance of the values in ary.
This method scan values in ary only once,
and does not cache the values on memory.
When the population: keyword parameter is true,
the variance is calculated as a population variance (divided by $n$).
The default population: keyword parameter is false;
this means the variance is a sample variance (divided by $n-1$).
1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 |
# File 'ext/enumerable/statistics/extension/statistics.c', line 1030 static VALUE ary_variance(int argc, VALUE* argv, VALUE ary) { struct variance_opts ; VALUE opts, variance; size_t ddof = 1; rb_scan_args(argc, argv, "0:", &opts); get_variance_opts(opts, &); if (.population) ddof = 0; ary_mean_variance(ary, NULL, &variance, ddof, .skip_na); return variance; } |