Module: Rust::Descriptive
- Defined in:
- lib/rust/stats/descriptive.rb
Class Method Summary collapse
- .mean(data) ⇒ Object
- .median(data) ⇒ Object
- .outliers(data, k = 1.5, **opts) ⇒ Object
- .outliers_according_to(data, data_distribution, k = 1.5, **opts) ⇒ Object
- .quantile(data, percentiles = [0.0, 0.25, 0.5, 0.75, 1.0]) ⇒ Object
- .standard_deviation(data) ⇒ Object (also: sd, stddev)
- .sum(data) ⇒ Object
- .variance(data) ⇒ Object (also: var)
Class Method Details
.mean(data) ⇒ Object
5 6 7 8 9 |
# File 'lib/rust/stats/descriptive.rb', line 5 def mean(data) raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) } return data.sum.to_f / data.size end |
.median(data) ⇒ Object
28 29 30 31 32 33 34 35 36 37 38 39 40 |
# File 'lib/rust/stats/descriptive.rb', line 28 def median(data) raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) } sorted = data.sort if data.size == 0 return Float::NAN elsif data.size.odd? return sorted[data.size / 2] else i = (data.size / 2) return (sorted[i - 1] + sorted[i]) / 2.0 end end |
.outliers(data, k = 1.5, **opts) ⇒ Object
80 81 82 |
# File 'lib/rust/stats/descriptive.rb', line 80 def outliers(data, k=1.5, **opts) outliers_according_to(data, data, k, **opts) end |
.outliers_according_to(data, data_distribution, k = 1.5, **opts) ⇒ Object
84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
# File 'lib/rust/stats/descriptive.rb', line 84 def outliers_according_to(data, data_distribution, k=1.5, **opts) quantiles = Rust::Descriptive.quantile(data_distribution, [0.25, 0.75]) q1 = quantiles[0.25] q3 = quantiles[0.75] iqr = q3 - q1 positive_outliers = data.select { |d| d > q3 + iqr * k } negative_outliers = data.select { |d| d < q1 - iqr * k } outliers = negative_outliers + positive_outliers if opts[:side] case opts[:side].to_sym when :positive, :neg, :n, :+ outliers = positive_outliers when :negative, :pos, :p, :- outliers = negative_outliers end end return outliers end |
.quantile(data, percentiles = [0.0, 0.25, 0.5, 0.75, 1.0]) ⇒ Object
48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
# File 'lib/rust/stats/descriptive.rb', line 48 def quantile(data, percentiles = [0.0, 0.25, 0.5, 0.75, 1.0]) raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) } raise TypeError, "Expecting Array of numerics" if !percentiles.is_a?(Array) || !percentiles.all? { |e| e.is_a?(Numeric) } raise "Percentiles outside the range: #{percentiles}" if percentiles.any? { |e| !e.between?(0, 1) } n = data.size quantiles = percentiles.size percentiles = percentiles.map { |x| x > 1.0 ? 1.0 : (x < 0.0 ? 0.0 : x) } rough_indices = percentiles.map { |x| 1 + [n - 1, 0].max * x - 1 } floor_indices = rough_indices.map { |i| i.floor } ceil_indices = rough_indices.map { |i| i.ceil } data = data.sort result = floor_indices.map { |i| data[i] } result_ceil = ceil_indices.map { |i| data[i] } indices_to_fix = (0...quantiles).select { |i| rough_indices[i] > floor_indices[i] && result_ceil[i] != result[i] } index_approximation_errors = indices_to_fix.map { |i| rough_indices[i] - floor_indices[i] } reduced_index_approximation_errors = index_approximation_errors.map { |i| (1 - i) } hi_indices = indices_to_fix.map { |i| ceil_indices[i] } data_hi_indices = hi_indices.map { |i| data[i] } j = 0 indices_to_fix.each do |i| result[i] = reduced_index_approximation_errors[j] * result[i] + index_approximation_errors[j] * data_hi_indices[j] j += 1 end return percentiles.zip(result).to_h end |
.standard_deviation(data) ⇒ Object Also known as: sd, stddev
11 12 13 14 15 |
# File 'lib/rust/stats/descriptive.rb', line 11 def standard_deviation(data) raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) } return Math.sqrt(variance(data)) end |
.sum(data) ⇒ Object
42 43 44 45 46 |
# File 'lib/rust/stats/descriptive.rb', line 42 def sum(data) raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) } return data.sum end |
.variance(data) ⇒ Object Also known as: var
19 20 21 22 23 24 25 |
# File 'lib/rust/stats/descriptive.rb', line 19 def variance(data) raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) } return Float::NAN if data.size < 2 mean = mean(data) return data.map { |v| (v - mean) ** 2 }.sum.to_f / (data.size - 1) end |