Module: HashingTrickMl::ActsAsVectorized

Extended by:
ActiveSupport::Concern
Defined in:
lib/hashing_trick_ml/acts_as_vectorized.rb

Defined Under Namespace

Modules: ClassMethods

Instance Method Summary collapse

Instance Method Details

#build_boolean_vector(subset, full_set) ⇒ Object



22
23
24
25
26
27
# File 'lib/hashing_trick_ml/acts_as_vectorized.rb', line 22

def build_boolean_vector(subset, full_set)
  subset.each_with_object([0] * full_set.size) do |value, result|
    index = full_set.index(value)
    result[index] = 1 if index.present?
  end
end

#build_exponential_vector(values, dimensions:) ⇒ Object



33
34
35
36
37
38
39
40
41
# File 'lib/hashing_trick_ml/acts_as_vectorized.rb', line 33

def build_exponential_vector(values, dimensions:)
  values.reject(&:blank?).each_with_object([0] * dimensions) do |value, result|
    index = Math.log(value.to_f, 1.5)
    index = 0 if index.negative?
    index = dimensions - 1 if index > dimensions - 1

    result[index.floor] += 1
  end
end

#build_fuzzy_vector(statement) ⇒ Object



29
30
31
# File 'lib/hashing_trick_ml/acts_as_vectorized.rb', line 29

def build_fuzzy_vector(statement)
  statement ? 1 : 0
end

#build_maybe_nil_vector(value) ⇒ Object



43
44
45
# File 'lib/hashing_trick_ml/acts_as_vectorized.rb', line 43

def build_maybe_nil_vector(value)
  [value.presence || 0, value.present? ? 1 : 0]
end

#build_word_vector(data, dimensions: self.class.default_dimensions, separator: ' ') ⇒ Object



15
16
17
18
19
20
# File 'lib/hashing_trick_ml/acts_as_vectorized.rb', line 15

def build_word_vector(data, dimensions: self.class.default_dimensions, separator: ' ')
  words = normalize_words(data).split(separator)
  words.each_with_object([0] * dimensions) do |word, result|
    result[Digest::SHA2.digest(word.downcase).last(4).unpack('N1').first % dimensions] += 1
  end
end