Module: Rust::Descriptive
- Defined in:
- lib/rust-descriptive.rb
Class Method Summary collapse
- .mean(data) ⇒ Object
- .median(data) ⇒ Object
- .outliers(data, k = 1.5, **opts) ⇒ Object
- .outliers_according_to(data, data_distribution, k = 1.5, **opts) ⇒ Object
- .quantile(data, percentiles = [0.0, 0.25, 0.5, 0.75, 1.0]) ⇒ Object
- .standard_deviation(data) ⇒ Object (also: sd, stddev)
- .sum(data) ⇒ Object
- .variance(data) ⇒ Object (also: var)
Class Method Details
.mean(data) ⇒ Object
7 8 9 10 11 |
# File 'lib/rust-descriptive.rb', line 7 def mean(data) raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) } return data.sum.to_f / data.size end |
.median(data) ⇒ Object
30 31 32 33 34 35 36 37 38 39 40 41 42 |
# File 'lib/rust-descriptive.rb', line 30 def median(data) raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) } sorted = data.sort if data.size == 0 return Float::NAN elsif data.size.odd? return sorted[data.size / 2] else i = (data.size / 2) return (sorted[i - 1] + sorted[i]) / 2.0 end end |
.outliers(data, k = 1.5, **opts) ⇒ Object
82 83 84 |
# File 'lib/rust-descriptive.rb', line 82 def outliers(data, k=1.5, **opts) outliers_according_to(data, data, k, **opts) end |
.outliers_according_to(data, data_distribution, k = 1.5, **opts) ⇒ Object
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
# File 'lib/rust-descriptive.rb', line 86 def outliers_according_to(data, data_distribution, k=1.5, **opts) quantiles = Rust::Descriptive.quantile(data_distribution, [0.25, 0.75]) q1 = quantiles[0.25] q3 = quantiles[0.75] iqr = q3 - q1 positive_outliers = data.select { |d| d > q3 + iqr * k } negative_outliers = data.select { |d| d < q1 - iqr * k } outliers = negative_outliers + positive_outliers if opts[:side] case opts[:side].to_sym when :positive, :neg, :n, :+ outliers = positive_outliers when :negative, :pos, :p, :- outliers = negative_outliers end end return outliers end |
.quantile(data, percentiles = [0.0, 0.25, 0.5, 0.75, 1.0]) ⇒ Object
50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
# File 'lib/rust-descriptive.rb', line 50 def quantile(data, percentiles=[0.0, 0.25, 0.5, 0.75, 1.0]) raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) } raise TypeError, "Expecting Array of numerics" if !percentiles.is_a?(Array) || !percentiles.all? { |e| e.is_a?(Numeric) } raise "Percentiles outside the range: #{percentiles}" if percentiles.any? { |e| !e.between?(0, 1) } n = data.size quantiles = percentiles.size percentiles = percentiles.map { |x| x > 1.0 ? 1.0 : (x < 0.0 ? 0.0 : x) } rough_indices = percentiles.map { |x| 1 + [n - 1, 0].max * x - 1 } floor_indices = rough_indices.map { |i| i.floor } ceil_indices = rough_indices.map { |i| i.ceil } data = data.sort result = floor_indices.map { |i| data[i] } result_ceil = ceil_indices.map { |i| data[i] } indices_to_fix = (0...quantiles).select { |i| rough_indices[i] > floor_indices[i] && result_ceil[i] != result[i] } index_approximation_errors = indices_to_fix.map { |i| rough_indices[i] - floor_indices[i] } reduced_index_approximation_errors = index_approximation_errors.map { |i| (1 - i) } hi_indices = indices_to_fix.map { |i| ceil_indices[i] } data_hi_indices = hi_indices.map { |i| data[i] } j = 0 indices_to_fix.each do |i| result[i] = reduced_index_approximation_errors[j] * result[i] + index_approximation_errors[j] * data_hi_indices[j] j += 1 end return percentiles.zip(result).to_h end |
.standard_deviation(data) ⇒ Object Also known as: sd, stddev
13 14 15 16 17 |
# File 'lib/rust-descriptive.rb', line 13 def standard_deviation(data) raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) } return Math.sqrt(variance(data)) end |
.sum(data) ⇒ Object
44 45 46 47 48 |
# File 'lib/rust-descriptive.rb', line 44 def sum(data) raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) } return data.sum end |
.variance(data) ⇒ Object Also known as: var
21 22 23 24 25 26 27 |
# File 'lib/rust-descriptive.rb', line 21 def variance(data) raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) } return Float::NAN if data.size < 2 mean = mean(data) return data.map { |v| (v - mean) ** 2 }.sum.to_f / (data.size - 1) end |