10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
|
# File 'lib/eps/text_encoder.rb', line 10
def fit(arr)
counts, fit = count_and_fit(arr)
min_length = options[:min_length]
if min_length
counts.select! { |k, _| k.length >= min_length }
end
min_occurrences = options[:min_occurrences]
if min_occurrences
counts.select! { |_, v| v >= min_occurrences }
end
max_occurrences = options[:max_occurrences]
if max_occurrences
counts.reject! { |_, v| v > max_occurrences }
end
max_features = options[:max_features]
if max_features
counts = counts.sort_by { |_, v| -v }[0...max_features].to_h
end
@vocabulary = counts.keys
fit
end
|