Module: MyMathGem::DataProcessing

Defined in:
lib/my_math_gem/data_processing.rb

Class Method Summary collapse

Class Method Details

.clean_data(data) ⇒ Object

Buang nil atau NaN dari data

Raises:

  • (ArgumentError)


4
5
6
7
# File 'lib/my_math_gem/data_processing.rb', line 4

def self.clean_data(data)
  raise ArgumentError, "Data harus array" unless data.is_a?(Array)
  data.compact.reject { |x| x.respond_to?(:nan?) && x.nan? }
end

.filter_outliers(data) ⇒ Object

Raises:

  • (ArgumentError)


108
109
110
111
112
113
114
115
116
117
118
# File 'lib/my_math_gem/data_processing.rb', line 108

def self.filter_outliers(data)
  data = clean_data(data)
  raise ArgumentError, "Data harus minimal 4 elemen" if data.size < 4
  sorted = data.sort
  q1 = percentile(sorted, 25)
  q3 = percentile(sorted, 75)
  iqr = q3 - q1
  lower_bound = q1 - 1.5 * iqr
  upper_bound = q3 + 1.5 * iqr
  data.select { |x| x >= lower_bound && x <= upper_bound }
end

.kurtosis(data) ⇒ Object

Raises:

  • (ArgumentError)


131
132
133
134
135
136
137
138
139
140
141
142
# File 'lib/my_math_gem/data_processing.rb', line 131

def self.kurtosis(data)
  data = clean_data(data)
  n = data.size
  raise ArgumentError, "Data minimal 4 elemen" if n < 4
  m = mean(data)
  sd = standard_deviation(data)
  return 0 if sd == 0
  sum_quad = data.sum { |x| (x - m)**4 }
  numerator = (n*(n+1)*sum_quad) / ((n-1)*(n-2)*(n-3)*(sd**4))
  denominator = (3*((n-1)**2)) / ((n-2)*(n-3))
  numerator - denominator
end

.mean(data) ⇒ Object

Raises:

  • (ArgumentError)


9
10
11
12
13
# File 'lib/my_math_gem/data_processing.rb', line 9

def self.mean(data)
  data = clean_data(data)
  raise ArgumentError, "Data harus tidak kosong" if data.empty?
  data.sum.to_f / data.size
end

.median(data) ⇒ Object

Raises:

  • (ArgumentError)


24
25
26
27
28
29
30
# File 'lib/my_math_gem/data_processing.rb', line 24

def self.median(data)
  data = clean_data(data)
  raise ArgumentError, "Data harus tidak kosong" if data.empty?
  sorted = data.sort
  mid = sorted.size / 2
  sorted.size.odd? ? sorted[mid] : (sorted[mid - 1] + sorted[mid]).to_f / 2
end

.min_max_normalize(data) ⇒ Object

Raises:

  • (ArgumentError)


68
69
70
71
72
73
74
75
76
# File 'lib/my_math_gem/data_processing.rb', line 68

def self.min_max_normalize(data)
  data = clean_data(data)
  raise ArgumentError, "Data harus tidak kosong" if data.empty?
  min = data.min
  max = data.max
  range = max - min
  raise ArgumentError, "Range data 0, normalisasi gagal" if range == 0
  data.map { |x| (x - min).to_f / range }
end

.mode(data) ⇒ Object

Raises:

  • (ArgumentError)


43
44
45
46
47
48
49
# File 'lib/my_math_gem/data_processing.rb', line 43

def self.mode(data)
  data = clean_data(data)
  raise ArgumentError, "Data harus tidak kosong" if data.empty?
  freq = data.each_with_object(Hash.new(0)) { |v, h| h[v] += 1 }
  max_freq = freq.values.max
  freq.select { |_, v| v == max_freq }.keys
end

.moving_average(data, w = 3) ⇒ Object

Raises:

  • (ArgumentError)


99
100
101
102
103
104
105
106
# File 'lib/my_math_gem/data_processing.rb', line 99

def self.moving_average(data, w=3)
  data = clean_data(data)
  raise ArgumentError, "Window harus integer > 0" unless w.is_a?(Integer) && w > 0
  return data if w == 1
  smoothed = []
  data.each_cons(w) { |window| smoothed << (window.sum.to_f / w) }
  smoothed
end

.pearson_correlation(x, y) ⇒ Object

Raises:

  • (ArgumentError)


162
163
164
165
166
167
168
169
170
171
172
173
174
# File 'lib/my_math_gem/data_processing.rb', line 162

def self.pearson_correlation(x, y)
  x = clean_data(x)
  y = clean_data(y)
  raise ArgumentError, "x dan y harus array sama panjang dan > 1" if x.size <= 1 || x.size != y.size
  mx = mean(x)
  my = mean(y)
  numerator = x.zip(y).sum { |xi, yi| (xi - mx) * (yi - my) }
  denom_x = Math.sqrt(x.sum { |xi| (xi - mx)**2 })
  denom_y = Math.sqrt(y.sum { |yi| (yi - my)**2 })
  denom = denom_x * denom_y
  return 0 if denom == 0
  numerator.to_f / denom
end

.percentile(sorted_data, p) ⇒ Object

Raises:

  • (ArgumentError)


144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
# File 'lib/my_math_gem/data_processing.rb', line 144

def self.percentile(sorted_data, p)
  raise ArgumentError, "p harus antara 0 dan 100" unless (0..100).include?(p)
  return sorted_data.first if p == 0
  return sorted_data.last if p == 100

  rank = (p.to_f / 100) * (sorted_data.size - 1)
  lower_idx = rank.floor
  upper_idx = rank.ceil
  if lower_idx == upper_idx
    sorted_data[lower_idx]
  else
    lower_value = sorted_data[lower_idx]
    upper_value = sorted_data[upper_idx]
    fraction = rank - lower_idx
    lower_value + fraction * (upper_value - lower_value)
  end
end

.robust_scale(data) ⇒ Object

Raises:

  • (ArgumentError)


87
88
89
90
91
92
93
94
95
96
97
# File 'lib/my_math_gem/data_processing.rb', line 87

def self.robust_scale(data)
  # Skala menggunakan median dan IQR
  data = clean_data(data)
  raise ArgumentError, "Data harus minimal 4 elemen" if data.size < 4
  med = median(data)
  q1 = percentile(data.sort, 25)
  q3 = percentile(data.sort, 75)
  iqr = q3 - q1
  raise ArgumentError, "IQR 0, robust scaling gagal" if iqr == 0
  data.map { |x| (x - med).to_f / iqr }
end

.skewness(data) ⇒ Object

Raises:

  • (ArgumentError)


120
121
122
123
124
125
126
127
128
129
# File 'lib/my_math_gem/data_processing.rb', line 120

def self.skewness(data)
  data = clean_data(data)
  n = data.size
  raise ArgumentError, "Data minimal 3 elemen" if n < 3
  m = mean(data)
  sd = standard_deviation(data)
  return 0 if sd == 0
  sum_cubed = data.sum { |x| (x - m)**3 }
  (n.to_f / ((n-1)*(n-2))) * (sum_cubed / (sd**3))
end

.standard_deviation(data) ⇒ Object



59
60
61
# File 'lib/my_math_gem/data_processing.rb', line 59

def self.standard_deviation(data)
  Math.sqrt(variance(data))
end

.standard_error_mean(data) ⇒ Object



63
64
65
66
# File 'lib/my_math_gem/data_processing.rb', line 63

def self.standard_error_mean(data)
  sd = standard_deviation(data)
  Math.sqrt(sd.to_f / clean_data(data).size)
end

.trimmed_mean(data, trim_ratio = 0.1) ⇒ Object

Raises:

  • (ArgumentError)


32
33
34
35
36
37
38
39
40
41
# File 'lib/my_math_gem/data_processing.rb', line 32

def self.trimmed_mean(data, trim_ratio=0.1)
  data = clean_data(data)
  raise ArgumentError, "trim_ratio harus antara 0 dan 0.5" unless trim_ratio.is_a?(Numeric) && trim_ratio >= 0 && trim_ratio <= 0.5
  raise ArgumentError, "Data harus cukup besar untuk trimming" if data.size < 2
  sorted = data.sort
  trim_count = (trim_ratio * sorted.size).floor
  trimmed = sorted[trim_count...-trim_count] || []
  raise ArgumentError, "Trimmed data kosong" if trimmed.empty?
  trimmed.sum.to_f / trimmed.size
end

.variance(data) ⇒ Object

Raises:

  • (ArgumentError)


51
52
53
54
55
56
57
# File 'lib/my_math_gem/data_processing.rb', line 51

def self.variance(data)
  data = clean_data(data)
  raise ArgumentError, "Data harus minimal 2 elemen" if data.size < 2
  m = mean(data)
  sum_sq = data.sum { |x| (x - m)**2 }
  sum_sq.to_f / (data.size - 1)
end

.weighted_mean(data, weights) ⇒ Object

Raises:

  • (ArgumentError)


15
16
17
18
19
20
21
22
# File 'lib/my_math_gem/data_processing.rb', line 15

def self.weighted_mean(data, weights)
  data = clean_data(data)
  raise ArgumentError, "Data dan weights harus array sama panjang dan tidak kosong" if data.size == 0 || data.size != weights.size
  total_weight = weights.sum.to_f
  raise ArgumentError, "Total bobot tidak boleh nol" if total_weight == 0
  weighted_sum = data.zip(weights).sum { |v, w| v * w }
  weighted_sum / total_weight
end

.z_score_normalize(data) ⇒ Object

Raises:

  • (ArgumentError)


78
79
80
81
82
83
84
85
# File 'lib/my_math_gem/data_processing.rb', line 78

def self.z_score_normalize(data)
  data = clean_data(data)
  raise ArgumentError, "Data harus minimal 2 elemen" if data.size < 2
  m = mean(data)
  sd = standard_deviation(data)
  raise ArgumentError, "Standar deviasi 0, normalisasi gagal" if sd == 0
  data.map { |x| (x - m) / sd.to_f }
end