Module: MyMathGem::DataProcessing
- Defined in:
- lib/my_math_gem/data_processing.rb
Class Method Summary collapse
-
.clean_data(data) ⇒ Object
Buang nil atau NaN dari data.
- .filter_outliers(data) ⇒ Object
- .kurtosis(data) ⇒ Object
- .mean(data) ⇒ Object
- .median(data) ⇒ Object
- .min_max_normalize(data) ⇒ Object
- .mode(data) ⇒ Object
- .moving_average(data, w = 3) ⇒ Object
- .pearson_correlation(x, y) ⇒ Object
- .percentile(sorted_data, p) ⇒ Object
- .robust_scale(data) ⇒ Object
- .skewness(data) ⇒ Object
- .standard_deviation(data) ⇒ Object
- .standard_error_mean(data) ⇒ Object
- .trimmed_mean(data, trim_ratio = 0.1) ⇒ Object
- .variance(data) ⇒ Object
- .weighted_mean(data, weights) ⇒ Object
- .z_score_normalize(data) ⇒ Object
Class Method Details
.clean_data(data) ⇒ Object
Buang nil atau NaN dari data
4 5 6 7 |
# File 'lib/my_math_gem/data_processing.rb', line 4 def self.clean_data(data) raise ArgumentError, "Data harus array" unless data.is_a?(Array) data.compact.reject { |x| x.respond_to?(:nan?) && x.nan? } end |
.filter_outliers(data) ⇒ Object
108 109 110 111 112 113 114 115 116 117 118 |
# File 'lib/my_math_gem/data_processing.rb', line 108 def self.filter_outliers(data) data = clean_data(data) raise ArgumentError, "Data harus minimal 4 elemen" if data.size < 4 sorted = data.sort q1 = percentile(sorted, 25) q3 = percentile(sorted, 75) iqr = q3 - q1 lower_bound = q1 - 1.5 * iqr upper_bound = q3 + 1.5 * iqr data.select { |x| x >= lower_bound && x <= upper_bound } end |
.kurtosis(data) ⇒ Object
131 132 133 134 135 136 137 138 139 140 141 142 |
# File 'lib/my_math_gem/data_processing.rb', line 131 def self.kurtosis(data) data = clean_data(data) n = data.size raise ArgumentError, "Data minimal 4 elemen" if n < 4 m = mean(data) sd = standard_deviation(data) return 0 if sd == 0 sum_quad = data.sum { |x| (x - m)**4 } numerator = (n*(n+1)*sum_quad) / ((n-1)*(n-2)*(n-3)*(sd**4)) denominator = (3*((n-1)**2)) / ((n-2)*(n-3)) numerator - denominator end |
.mean(data) ⇒ Object
9 10 11 12 13 |
# File 'lib/my_math_gem/data_processing.rb', line 9 def self.mean(data) data = clean_data(data) raise ArgumentError, "Data harus tidak kosong" if data.empty? data.sum.to_f / data.size end |
.median(data) ⇒ Object
24 25 26 27 28 29 30 |
# File 'lib/my_math_gem/data_processing.rb', line 24 def self.median(data) data = clean_data(data) raise ArgumentError, "Data harus tidak kosong" if data.empty? sorted = data.sort mid = sorted.size / 2 sorted.size.odd? ? sorted[mid] : (sorted[mid - 1] + sorted[mid]).to_f / 2 end |
.min_max_normalize(data) ⇒ Object
68 69 70 71 72 73 74 75 76 |
# File 'lib/my_math_gem/data_processing.rb', line 68 def self.min_max_normalize(data) data = clean_data(data) raise ArgumentError, "Data harus tidak kosong" if data.empty? min = data.min max = data.max range = max - min raise ArgumentError, "Range data 0, normalisasi gagal" if range == 0 data.map { |x| (x - min).to_f / range } end |
.mode(data) ⇒ Object
43 44 45 46 47 48 49 |
# File 'lib/my_math_gem/data_processing.rb', line 43 def self.mode(data) data = clean_data(data) raise ArgumentError, "Data harus tidak kosong" if data.empty? freq = data.each_with_object(Hash.new(0)) { |v, h| h[v] += 1 } max_freq = freq.values.max freq.select { |_, v| v == max_freq }.keys end |
.moving_average(data, w = 3) ⇒ Object
99 100 101 102 103 104 105 106 |
# File 'lib/my_math_gem/data_processing.rb', line 99 def self.moving_average(data, w=3) data = clean_data(data) raise ArgumentError, "Window harus integer > 0" unless w.is_a?(Integer) && w > 0 return data if w == 1 smoothed = [] data.each_cons(w) { |window| smoothed << (window.sum.to_f / w) } smoothed end |
.pearson_correlation(x, y) ⇒ Object
162 163 164 165 166 167 168 169 170 171 172 173 174 |
# File 'lib/my_math_gem/data_processing.rb', line 162 def self.pearson_correlation(x, y) x = clean_data(x) y = clean_data(y) raise ArgumentError, "x dan y harus array sama panjang dan > 1" if x.size <= 1 || x.size != y.size mx = mean(x) my = mean(y) numerator = x.zip(y).sum { |xi, yi| (xi - mx) * (yi - my) } denom_x = Math.sqrt(x.sum { |xi| (xi - mx)**2 }) denom_y = Math.sqrt(y.sum { |yi| (yi - my)**2 }) denom = denom_x * denom_y return 0 if denom == 0 numerator.to_f / denom end |
.percentile(sorted_data, p) ⇒ Object
144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 |
# File 'lib/my_math_gem/data_processing.rb', line 144 def self.percentile(sorted_data, p) raise ArgumentError, "p harus antara 0 dan 100" unless (0..100).include?(p) return sorted_data.first if p == 0 return sorted_data.last if p == 100 rank = (p.to_f / 100) * (sorted_data.size - 1) lower_idx = rank.floor upper_idx = rank.ceil if lower_idx == upper_idx sorted_data[lower_idx] else lower_value = sorted_data[lower_idx] upper_value = sorted_data[upper_idx] fraction = rank - lower_idx lower_value + fraction * (upper_value - lower_value) end end |
.robust_scale(data) ⇒ Object
87 88 89 90 91 92 93 94 95 96 97 |
# File 'lib/my_math_gem/data_processing.rb', line 87 def self.robust_scale(data) # Skala menggunakan median dan IQR data = clean_data(data) raise ArgumentError, "Data harus minimal 4 elemen" if data.size < 4 med = median(data) q1 = percentile(data.sort, 25) q3 = percentile(data.sort, 75) iqr = q3 - q1 raise ArgumentError, "IQR 0, robust scaling gagal" if iqr == 0 data.map { |x| (x - med).to_f / iqr } end |
.skewness(data) ⇒ Object
120 121 122 123 124 125 126 127 128 129 |
# File 'lib/my_math_gem/data_processing.rb', line 120 def self.skewness(data) data = clean_data(data) n = data.size raise ArgumentError, "Data minimal 3 elemen" if n < 3 m = mean(data) sd = standard_deviation(data) return 0 if sd == 0 sum_cubed = data.sum { |x| (x - m)**3 } (n.to_f / ((n-1)*(n-2))) * (sum_cubed / (sd**3)) end |
.standard_deviation(data) ⇒ Object
59 60 61 |
# File 'lib/my_math_gem/data_processing.rb', line 59 def self.standard_deviation(data) Math.sqrt(variance(data)) end |
.standard_error_mean(data) ⇒ Object
63 64 65 66 |
# File 'lib/my_math_gem/data_processing.rb', line 63 def self.standard_error_mean(data) sd = standard_deviation(data) Math.sqrt(sd.to_f / clean_data(data).size) end |
.trimmed_mean(data, trim_ratio = 0.1) ⇒ Object
32 33 34 35 36 37 38 39 40 41 |
# File 'lib/my_math_gem/data_processing.rb', line 32 def self.trimmed_mean(data, trim_ratio=0.1) data = clean_data(data) raise ArgumentError, "trim_ratio harus antara 0 dan 0.5" unless trim_ratio.is_a?(Numeric) && trim_ratio >= 0 && trim_ratio <= 0.5 raise ArgumentError, "Data harus cukup besar untuk trimming" if data.size < 2 sorted = data.sort trim_count = (trim_ratio * sorted.size).floor trimmed = sorted[trim_count...-trim_count] || [] raise ArgumentError, "Trimmed data kosong" if trimmed.empty? trimmed.sum.to_f / trimmed.size end |
.variance(data) ⇒ Object
51 52 53 54 55 56 57 |
# File 'lib/my_math_gem/data_processing.rb', line 51 def self.variance(data) data = clean_data(data) raise ArgumentError, "Data harus minimal 2 elemen" if data.size < 2 m = mean(data) sum_sq = data.sum { |x| (x - m)**2 } sum_sq.to_f / (data.size - 1) end |
.weighted_mean(data, weights) ⇒ Object
15 16 17 18 19 20 21 22 |
# File 'lib/my_math_gem/data_processing.rb', line 15 def self.weighted_mean(data, weights) data = clean_data(data) raise ArgumentError, "Data dan weights harus array sama panjang dan tidak kosong" if data.size == 0 || data.size != weights.size total_weight = weights.sum.to_f raise ArgumentError, "Total bobot tidak boleh nol" if total_weight == 0 weighted_sum = data.zip(weights).sum { |v, w| v * w } weighted_sum / total_weight end |
.z_score_normalize(data) ⇒ Object
78 79 80 81 82 83 84 85 |
# File 'lib/my_math_gem/data_processing.rb', line 78 def self.z_score_normalize(data) data = clean_data(data) raise ArgumentError, "Data harus minimal 2 elemen" if data.size < 2 m = mean(data) sd = standard_deviation(data) raise ArgumentError, "Standar deviasi 0, normalisasi gagal" if sd == 0 data.map { |x| (x - m) / sd.to_f } end |