Method: Eps::TextEncoder#fit

Defined in:
lib/eps/text_encoder.rb

#fit(arr) ⇒ Object



10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# File 'lib/eps/text_encoder.rb', line 10

def fit(arr)
  counts, fit = count_and_fit(arr)

  min_length = options[:min_length]
  if min_length
    counts.select! { |k, _| k.length >= min_length }
  end

  min_occurrences = options[:min_occurrences]
  if min_occurrences
    counts.select! { |_, v| v >= min_occurrences }
  end

  max_occurrences = options[:max_occurrences]
  if max_occurrences
    counts.reject! { |_, v| v > max_occurrences }
  end

  max_features = options[:max_features]
  if max_features
    counts = counts.sort_by { |_, v| -v }[0...max_features].to_h
  end

  @vocabulary = counts.keys

  fit
end