Module: Geminize::VectorUtils

Defined in:
lib/geminize/vector_utils.rb

Overview

Utility module for vector operations used with embeddings

Class Method Summary collapse

Class Method Details

.average_vectors(vectors) ⇒ Array<Float>

Average multiple vectors

Raises:



102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# File 'lib/geminize/vector_utils.rb', line 102

def average_vectors(vectors)
  if vectors.empty?
    raise Geminize::ValidationError.new(
      "Cannot average an empty array of vectors",
      "INVALID_ARGUMENT"
    )
  end

  # Check all vectors have same dimensionality
  dim = vectors.first.length
  vectors.each_with_index do |vec, i|
    unless vec.length == dim
      raise Geminize::ValidationError.new(
        "All vectors must have the same dimensions (expected #{dim}, got #{vec.length} at index #{i})",
        "INVALID_ARGUMENT"
      )
    end
  end

  # Calculate average
  avg = Array.new(dim, 0.0)
  vectors.each do |vec|
    vec.each_with_index do |v, i|
      avg[i] += v
    end
  end

  avg.map { |sum| sum / vectors.length }
end

.cosine_similarity(vec1, vec2) ⇒ Float

Calculate the cosine similarity between two vectors

Raises:



12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# File 'lib/geminize/vector_utils.rb', line 12

def cosine_similarity(vec1, vec2)
  unless vec1.length == vec2.length
    raise Geminize::ValidationError.new(
      "Vectors must have the same dimensions (#{vec1.length} vs #{vec2.length})",
      "INVALID_ARGUMENT"
    )
  end

  dot_product = 0.0
  magnitude1 = 0.0
  magnitude2 = 0.0

  vec1.zip(vec2).each do |v1, v2|
    dot_product += v1 * v2
    magnitude1 += v1 * v1
    magnitude2 += v2 * v2
  end

  magnitude1 = Math.sqrt(magnitude1)
  magnitude2 = Math.sqrt(magnitude2)

  # Guard against division by zero
  return 0.0 if magnitude1.zero? || magnitude2.zero?

  dot_product / (magnitude1 * magnitude2)
end

.dot_product(vec1, vec2) ⇒ Float

Calculate the dot product of two vectors

Raises:



66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# File 'lib/geminize/vector_utils.rb', line 66

def dot_product(vec1, vec2)
  unless vec1.length == vec2.length
    raise Geminize::ValidationError.new(
      "Vectors must have the same dimensions (#{vec1.length} vs #{vec2.length})",
      "INVALID_ARGUMENT"
    )
  end

  product = 0.0
  vec1.zip(vec2).each do |v1, v2|
    product += v1 * v2
  end

  product
end

.euclidean_distance(vec1, vec2) ⇒ Float

Calculate the Euclidean distance between two vectors

Raises:



44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# File 'lib/geminize/vector_utils.rb', line 44

def euclidean_distance(vec1, vec2)
  unless vec1.length == vec2.length
    raise Geminize::ValidationError.new(
      "Vectors must have the same dimensions (#{vec1.length} vs #{vec2.length})",
      "INVALID_ARGUMENT"
    )
  end

  sum_square_diff = 0.0
  vec1.zip(vec2).each do |v1, v2|
    diff = v1 - v2
    sum_square_diff += diff * diff
  end

  Math.sqrt(sum_square_diff)
end

.most_similar(target, vectors, top_k = nil, metric = :cosine) ⇒ Array<Hash>

Find the most similar vectors to a target vector



138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
# File 'lib/geminize/vector_utils.rb', line 138

def most_similar(target, vectors, top_k = nil, metric = :cosine)
  similarities = []

  vectors.each_with_index do |vec, i|
    similarity = case metric
    when :cosine
      cosine_similarity(target, vec)
    when :euclidean
      # Convert to similarity (higher is more similar)
      1.0 / (1.0 + euclidean_distance(target, vec))
    else
      raise Geminize::ValidationError.new(
        "Unknown metric: #{metric}. Supported metrics: :cosine, :euclidean",
        "INVALID_ARGUMENT"
      )
    end

    similarities << {index: i, similarity: similarity}
  end

  # Sort by similarity (descending)
  sorted = similarities.sort_by { |s| -s[:similarity] }
  top_k ? sorted.take(top_k) : sorted
end

.normalize(vec) ⇒ Array<Float>

Normalize a vector to unit length



85
86
87
88
89
90
91
92
93
94
95
96
# File 'lib/geminize/vector_utils.rb', line 85

def normalize(vec)
  magnitude = 0.0
  vec.each do |v|
    magnitude += v * v
  end
  magnitude = Math.sqrt(magnitude)

  # Handle zero magnitude vector
  return vec.map { 0.0 } if magnitude.zero?

  vec.map { |v| v / magnitude }
end