Class: Eps::TextEncoder

Inherits:
Object
  • Object
show all
Defined in:
lib/eps/text_encoder.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(**options) ⇒ TextEncoder

Returns a new instance of TextEncoder.



5
6
7
8
# File 'lib/eps/text_encoder.rb', line 5

def initialize(**options)
  @options = options
  @vocabulary = options[:vocabulary] || []
end

Instance Attribute Details

#optionsObject (readonly)

Returns the value of attribute options.



3
4
5
# File 'lib/eps/text_encoder.rb', line 3

def options
  @options
end

#vocabularyObject (readonly)

Returns the value of attribute vocabulary.



3
4
5
# File 'lib/eps/text_encoder.rb', line 3

def vocabulary
  @vocabulary
end

Instance Method Details

#fit(arr) ⇒ Object



10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# File 'lib/eps/text_encoder.rb', line 10

def fit(arr)
  counts, fit = count_and_fit(arr)

  min_length = options[:min_length]
  if min_length
    counts.select! { |k, _| k.length >= min_length }
  end

  min_occurrences = options[:min_occurrences]
  if min_occurrences
    counts.select! { |_, v| v >= min_occurrences }
  end

  max_occurrences = options[:max_occurrences]
  if max_occurrences
    counts.reject! { |_, v| v > max_occurrences }
  end

  max_features = options[:max_features]
  if max_features
    counts = Hash[counts.sort_by { |_, v| -v }[0...max_features]]
  end

  @vocabulary = counts.keys

  fit
end

#transform(arr) ⇒ Object



38
39
40
41
# File 'lib/eps/text_encoder.rb', line 38

def transform(arr)
  counts, fit = count_and_fit(arr)
  fit
end