86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
|
# File 'lib/easy_ml/data/preprocessor/simple_imputer.rb', line 86
def transform_polars(x)
result = case @strategy
when :mean, :median, :ffill, :most_frequent, :constant
x.fill_null(@statistics[@strategy][:value])
when :clip
min = options["min"] || 0
max = options["max"] || 1_000_000_000_000
if x.null_count != x.len
x.clip(min, max)
else
x
end
when :categorical
allowed_values = @statistics.dig(:categorical, :value).select do |_k, v|
v >= options[:categorical_min]
end.keys.map(&:to_s)
if x.null_count == x.len
x.fill_null(transform_categorical(nil))
else
x.apply do |val|
allowed_values.include?(val) ? val : transform_categorical(val)
end
end
when :today
x.fill_null(transform_today(nil))
when :custom
if x.null_count == x.len
x.fill_null(transform_custom(nil))
else
x.apply do |val|
should_transform_custom?(val) ? transform_custom(val) : val
end
end
else
raise ArgumentError, "Unsupported strategy for Polars::Series: #{@strategy}"
end
original_dtype = @statistics.dig(@strategy, :original_dtype)
original_dtype ? result.cast(original_dtype) : result
end
|