Class: Geminize::Models::EmbeddingResponse

Inherits:
Object
  • Object
show all
Defined in:
lib/geminize/models/embedding_response.rb

Overview

Represents a response from the Gemini API for an embedding request

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(data) ⇒ EmbeddingResponse

Initialize a new embedding response

Parameters:

  • data (Hash)

    The raw API response data

Raises:



16
17
18
19
20
# File 'lib/geminize/models/embedding_response.rb', line 16

def initialize(data)
  @data = data
  validate!
  parse_response
end

Instance Attribute Details

#dataHash (readonly)

Returns The raw response data from the API.

Returns:

  • (Hash)

    The raw response data from the API



8
9
10
# File 'lib/geminize/models/embedding_response.rb', line 8

def data
  @data
end

#usageHash? (readonly)

Returns Token counts for the request and response.

Returns:

  • (Hash, nil)

    Token counts for the request and response



11
12
13
# File 'lib/geminize/models/embedding_response.rb', line 11

def usage
  @usage
end

Class Method Details

.cosine_similarity(vec1, vec2) ⇒ Float

Calculate the cosine similarity between two embedding vectors

Parameters:

  • vec1 (Array<Float>)

    First vector

  • vec2 (Array<Float>)

    Second vector

Returns:

  • (Float)

    Cosine similarity (-1 to 1)

Raises:



106
107
108
# File 'lib/geminize/models/embedding_response.rb', line 106

def self.cosine_similarity(vec1, vec2)
  Geminize::VectorUtils.cosine_similarity(vec1, vec2)
end

.from_hash(response_data) ⇒ EmbeddingResponse

Create an EmbeddingResponse object from a raw API response

Parameters:

  • response_data (Hash)

    The raw API response

Returns:



377
378
379
# File 'lib/geminize/models/embedding_response.rb', line 377

def self.from_hash(response_data)
  new(response_data)
end

.load(path, format = nil) ⇒ Geminize::Models::EmbeddingResponse

Load embeddings from a file

Parameters:

  • path (String)

    Path to the file

  • format (Symbol, nil) (defaults to: nil)

    Format of the file (:json, :csv, :binary) If nil, format will be inferred from file extension

Returns:

Raises:



633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
# File 'lib/geminize/models/embedding_response.rb', line 633

def self.load(path, format = nil)
  # Infer format from file extension if not specified
  if format.nil?
    ext = File.extname(path).downcase.delete(".")
    format = case ext
    when "json" then :json
    when "csv" then :csv
    when "bin" then :binary
    else
      raise Geminize::ValidationError.new(
        "Could not infer format from file extension: #{ext}",
        "INVALID_ARGUMENT"
      )
    end
  end

  begin
    content = File.read(path)

    case format
    when :json
      data = JSON.parse(content)

      if data["embeddings"]
        # Convert to API response format
        response_data = {
          "embeddings" => data["embeddings"].map { |vec| {"values" => vec} }
        }

        # Add usage metadata if available
        if data["metadata"] && data["metadata"]["total_tokens"]
          response_data["usageMetadata"] = {
            "promptTokenCount" => data["metadata"]["prompt_tokens"] || 0,
            "totalTokenCount" => data["metadata"]["total_tokens"] || 0
          }
        end

        from_hash(response_data)
      else
        # Assume it's already in the API response format
        from_hash(data)
      end
    when :csv
      lines = content.split("\n")

      # Skip header if it doesn't look like an embedding (has letters)
      has_header = lines[0].match?(/[a-zA-Z]/)

      # Parse vectors
      vectors = lines.map.with_index do |line, idx|
        next if idx == 0 && has_header
        line.split(",").map(&:to_f)
      end.compact

      # Create a response hash
      response_data = {
        "embeddings" => vectors.map { |vec| {"values" => vec} }
      }

      from_hash(response_data)
    when :binary
      raise Geminize::ValidationError.new(
        "Binary format not yet implemented",
        "INVALID_ARGUMENT"
      )
    else
      raise Geminize::ValidationError.new(
        "Unknown format: #{format}. Supported formats: :json, :csv",
        "INVALID_ARGUMENT"
      )
    end
  rescue JSON::ParserError => e
    raise Geminize::ValidationError.new(
      "Failed to parse JSON: #{e.message}",
      "INVALID_ARGUMENT"
    )
  rescue => e
    raise Geminize::ValidationError.new(
      "Failed to load embeddings: #{e.message}",
      "IO_ERROR"
    )
  end
end

Instance Method Details

#average_embeddingArray<Float>

Average the embeddings in this response

Returns:

  • (Array<Float>)

    Average embedding vector

Raises:



219
220
221
222
223
224
225
226
# File 'lib/geminize/models/embedding_response.rb', line 219

def average_embedding
  vecs = embeddings
  if vecs.empty?
    raise Geminize::ValidationError.new("No embeddings found to average", "INVALID_ARGUMENT")
  end

  VectorUtils.average_vectors(vecs)
end

#batch?Boolean

Check if the response is a batch (multiple embeddings)

Returns:

  • (Boolean)

    True if the response contains multiple embeddings



31
32
33
34
# File 'lib/geminize/models/embedding_response.rb', line 31

def batch?
  # Check if we have the 'embeddings' key with an array value
  @data.has_key?("embeddings") && @data["embeddings"].is_a?(Array)
end

#batch_sizeInteger

Get the number of embeddings in the batch

Returns:

  • (Integer)

    The number of embeddings (1 for single embedding, N for batch)



69
70
71
72
73
74
75
76
77
# File 'lib/geminize/models/embedding_response.rb', line 69

def batch_size
  if single?
    1
  elsif batch?
    @data["embeddings"].size
  else
    0
  end
end

#cluster(k, max_iterations = 100, metric = :cosine) ⇒ Hash

Note:

This is a basic implementation of k-means clustering for demonstration purposes

Perform simple clustering of embeddings

Parameters:

  • k (Integer)

    Number of clusters

  • max_iterations (Integer) (defaults to: 100)

    Maximum number of iterations for clustering

  • metric (Symbol) (defaults to: :cosine)

    Distance metric to use (:cosine or :euclidean)

Returns:

  • (Hash)

    Hash with :clusters (array of indices) and :centroids (cluster centers)

Raises:



724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
# File 'lib/geminize/models/embedding_response.rb', line 724

def cluster(k, max_iterations = 100, metric = :cosine)
  vecs = embeddings

  if vecs.empty?
    raise Geminize::ValidationError.new(
      "Cannot cluster empty embeddings",
      "INVALID_ARGUMENT"
    )
  end

  if k <= 0 || k > vecs.length
    raise Geminize::ValidationError.new(
      "Number of clusters must be between 1 and #{vecs.length}, got: #{k}",
      "INVALID_ARGUMENT"
    )
  end

  # Normalize vectors for better clustering (especially important for cosine similarity)
  normalized_vecs = vecs.map { |v| VectorUtils.normalize(v) }

  # Initialize centroids using k-means++ algorithm
  centroids = kmeans_plus_plus_init(normalized_vecs, k, metric)

  # Initialize cluster assignments
  cluster_assignments = Array.new(normalized_vecs.length, -1)

  # Main K-means loop
  iterations = 0
  changes = true

  while changes && iterations < max_iterations
    changes = false

    # Assign points to clusters
    normalized_vecs.each_with_index do |vec, idx|
      best_distance = -Float::INFINITY
      best_cluster = -1

      centroids.each_with_index do |centroid, cluster_idx|
        # Calculate similarity (higher is better)
        similarity = case metric
        when :cosine
          VectorUtils.cosine_similarity(vec, centroid)
        when :euclidean
          # Convert to similarity (higher is more similar)
          1.0 / (1.0 + VectorUtils.euclidean_distance(vec, centroid))
        else
          raise Geminize::ValidationError.new(
            "Unknown metric: #{metric}. Supported metrics: :cosine, :euclidean",
            "INVALID_ARGUMENT"
          )
        end

        if similarity > best_distance
          best_distance = similarity
          best_cluster = cluster_idx
        end
      end

      # Update cluster assignment if it changed
      if cluster_assignments[idx] != best_cluster
        cluster_assignments[idx] = best_cluster
        changes = true
      end
    end

    # Update centroids
    new_centroids = Array.new(k) { [] }

    # Collect points for each cluster
    normalized_vecs.each_with_index do |vec, idx|
      cluster_idx = cluster_assignments[idx]
      new_centroids[cluster_idx] << vec if cluster_idx >= 0
    end

    # Calculate new centroids (average of points in each cluster)
    new_centroids.each_with_index do |cluster_points, idx|
      if cluster_points.empty?
        # If a cluster is empty, reinitialize with a point farthest from other centroids
        farthest_idx = find_farthest_point(normalized_vecs, centroids, cluster_assignments)
        centroids[idx] = normalized_vecs[farthest_idx].dup
      else
        # Otherwise take the average and normalize
        avg = VectorUtils.average_vectors(cluster_points)
        centroids[idx] = VectorUtils.normalize(avg)
      end
    end

    iterations += 1
  end

  # Organize results by cluster
  clusters = Array.new(k) { [] }
  cluster_assignments.each_with_index do |cluster_idx, idx|
    clusters[cluster_idx] << idx if cluster_idx >= 0
  end

  {
    clusters: clusters,
    centroids: centroids,
    iterations: iterations,
    metric: metric
  }
end

#combine(other) ⇒ Geminize::Models::EmbeddingResponse

Combine with another EmbeddingResponse

Parameters:

Returns:

Raises:



525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
# File 'lib/geminize/models/embedding_response.rb', line 525

def combine(other)
  unless other.is_a?(Geminize::Models::EmbeddingResponse)
    raise Geminize::ValidationError.new(
      "Can only combine with another EmbeddingResponse",
      "INVALID_ARGUMENT"
    )
  end

  # Check dimension compatibility
  if dimensions != other.dimensions
    raise Geminize::ValidationError.new(
      "Cannot combine embeddings with different dimensions (#{dimensions} vs #{other.dimensions})",
      "INVALID_ARGUMENT"
    )
  end

  # Create a combined response hash
  combined_hash = {
    "embeddings" => [],
    "usageMetadata" => {
      "promptTokenCount" => 0,
      "totalTokenCount" => 0
    }
  }

  # Add embeddings from both responses
  self_vecs = embeddings
  other_vecs = other.embeddings

  # Prepare the embeddings format
  combined_embeddings = (self_vecs + other_vecs).map do |vec|
    {"values" => vec}
  end

  combined_hash["embeddings"] = combined_embeddings

  # Combine usage data if available
  if @usage
    combined_hash["usageMetadata"]["promptTokenCount"] += @usage["promptTokenCount"] || 0
    combined_hash["usageMetadata"]["totalTokenCount"] += @usage["totalTokenCount"] || 0
  end

  if other.usage
    combined_hash["usageMetadata"]["promptTokenCount"] += other.usage["promptTokenCount"] || 0
    combined_hash["usageMetadata"]["totalTokenCount"] += other.usage["totalTokenCount"] || 0
  end

  # Create a new response object
  self.class.from_hash(combined_hash)
end

#dimensionsInteger?

Get the dimensionality of the embeddings

Returns:

  • (Integer, nil)

    The number of dimensions or nil if no embeddings



353
354
355
356
# File 'lib/geminize/models/embedding_response.rb', line 353

def dimensions
  first = embedding
  first&.length
end

#each_embedding {|embedding, index| ... } ⇒ Enumerator, self

Iterates through each embedding with its index

Yields:

  • (embedding, index)

    Block to execute for each embedding

Yield Parameters:

  • embedding (Array<Float>)

    The embedding vector

  • index (Integer)

    The index of the embedding

Returns:

  • (Enumerator, self)

    Returns an enumerator if no block given, or self if block given



442
443
444
445
446
447
448
449
450
451
# File 'lib/geminize/models/embedding_response.rb', line 442

def each_embedding
  return to_enum(:each_embedding) unless block_given?

  vecs = embeddings
  vecs.each_with_index do |vec, idx|
    yield vec, idx
  end

  self
end

#embeddingArray<Float>

Alias for embedding_at(0)

Returns:

  • (Array<Float>)

    The first embedding



97
98
99
# File 'lib/geminize/models/embedding_response.rb', line 97

def embedding
  embedding_at(0)
end

#embedding_at(index) ⇒ Array<Float>

Get a specific embedding from the batch by index

Parameters:

  • index (Integer)

    The index of the embedding to retrieve

Returns:

  • (Array<Float>)

    The embedding values at the specified index

Raises:

  • (IndexError)

    If the index is out of bounds



83
84
85
86
87
88
89
90
91
92
93
# File 'lib/geminize/models/embedding_response.rb', line 83

def embedding_at(index)
  if index < 0 || index >= batch_size
    raise IndexError, "Index #{index} out of bounds for batch size #{batch_size}"
  end

  if single? && index == 0
    values
  elsif batch?
    @data["embeddings"][index]["values"]
  end
end

#embedding_sizeInteger

Get the size of each embedding (vector dimension)

Returns:

  • (Integer)

    The number of dimensions in each embedding



57
58
59
60
61
62
63
64
65
# File 'lib/geminize/models/embedding_response.rb', line 57

def embedding_size
  if single?
    values.size
  elsif batch? && !@data["embeddings"].empty?
    @data["embeddings"].first["values"].size
  else
    0
  end
end

#embeddingsArray<Array<Float>>

Get all embeddings as an array of arrays

Returns:

  • (Array<Array<Float>>)

    Array of embedding vectors



45
46
47
48
49
50
51
52
53
# File 'lib/geminize/models/embedding_response.rb', line 45

def embeddings
  if single?
    [values]
  elsif batch?
    @data["embeddings"].map { |emb| emb["values"] }
  else
    []
  end
end

#euclidean_distance(index1, index2) ⇒ Float

Calculate Euclidean distance between two embeddings

Parameters:

  • index1 (Integer)

    First embedding index

  • index2 (Integer)

    Second embedding index

Returns:

  • (Float)

    Euclidean distance

Raises:



233
234
235
236
237
238
239
240
241
242
243
244
245
246
# File 'lib/geminize/models/embedding_response.rb', line 233

def euclidean_distance(index1, index2)
  vec1 = embedding_at(index1)
  vec2 = embedding_at(index2)

  if vec1.nil?
    raise Geminize::ValidationError.new("Invalid embedding index: #{index1}", "INVALID_ARGUMENT")
  end

  if vec2.nil?
    raise Geminize::ValidationError.new("Invalid embedding index: #{index2}", "INVALID_ARGUMENT")
  end

  VectorUtils.euclidean_distance(vec1, vec2)
end

#filter {|embedding, index| ... } ⇒ Array<Array<Float>>

Note:

This method doesn't modify the original response object

Filter embeddings based on a condition

Yields:

  • (embedding, index)

    Block that returns true if the embedding should be included

Yield Parameters:

  • embedding (Array<Float>)

    The embedding vector

  • index (Integer)

    The index of the embedding

Returns:

  • (Array<Array<Float>>)

    Filtered embeddings



486
487
488
489
490
491
492
493
494
# File 'lib/geminize/models/embedding_response.rb', line 486

def filter
  return to_enum(:filter) unless block_given?

  filtered = []
  each_embedding do |vec, idx|
    filtered << vec if yield(vec, idx)
  end
  filtered
end

#map_embeddings {|embedding, index| ... } ⇒ Array<Array<Float>>

Apply a transformation to all embeddings

Yields:

  • (embedding, index)

    Block that transforms a single embedding

Yield Parameters:

  • embedding (Array<Float>)

    The embedding vector

  • index (Integer)

    The index of the embedding

Yield Returns:

  • (Array<Float>)

    The transformed embedding

Returns:

  • (Array<Array<Float>>)

    Transformed embeddings

Raises:



892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
# File 'lib/geminize/models/embedding_response.rb', line 892

def map_embeddings
  return to_enum(:map_embeddings) unless block_given?

  vecs = embeddings
  result = []

  vecs.each_with_index do |vec, idx|
    transformed = yield(vec, idx)

    unless transformed.is_a?(Array)
      raise Geminize::ValidationError.new(
        "Transformation must return an array, got: #{transformed.class}",
        "INVALID_ARGUMENT"
      )
    end

    result << transformed
  end

  result
end

#metadataHash

Get metadata about the embeddings

Returns:

  • (Hash)

    Metadata about the embeddings including counts and token usage



420
421
422
423
424
425
426
427
428
429
# File 'lib/geminize/models/embedding_response.rb', line 420

def 
  {
    count: batch_size,
    dimensions: dimensions,
    total_tokens: total_tokens,
    prompt_tokens: prompt_tokens,
    is_batch: batch?,
    is_single: single?
  }
end

#most_similar(index, top_k = nil, metric = :cosine) ⇒ Array<Hash>

Find the most similar embeddings to a given index

Parameters:

  • index (Integer)

    Index of the embedding to compare against

  • top_k (Integer, nil) (defaults to: nil)

    Number of similar embeddings to return

  • metric (Symbol) (defaults to: :cosine)

    Distance metric to use (:cosine or :euclidean)

Returns:

  • (Array<Hash>)

    Array of similarity: hashes sorted by similarity

Raises:



188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
# File 'lib/geminize/models/embedding_response.rb', line 188

def most_similar(index, top_k = nil, metric = :cosine)
  vec = embedding_at(index)
  if vec.nil?
    raise Geminize::ValidationError.new("Invalid embedding index: #{index}", "INVALID_ARGUMENT")
  end

  # Get all vectors except the target one
  other_vectors = embeddings.each_with_index.map { |v, i| (i == index) ? nil : v }.compact
  other_indexes = embeddings.each_with_index.map { |_, i| (i == index) ? nil : i }.compact

  # Find most similar
  similarities = VectorUtils.most_similar(vec, other_vectors, nil, metric)

  # Map back to original indexes
  similarities.each_with_index do |result, i|
    result[:index] = other_indexes[result[:index]]
  end

  # Return top k if specified
  top_k ? similarities.take(top_k) : similarities
end

#normalized_embeddingsArray<Array<Float>>

Normalize embeddings to unit length

Returns:

  • (Array<Array<Float>>)

    Normalized embeddings



212
213
214
# File 'lib/geminize/models/embedding_response.rb', line 212

def normalized_embeddings
  embeddings.map { |v| VectorUtils.normalize(v) }
end

#prepare_visualization_data(method = :pca, dimensions = 2) ⇒ Array<Hash>

Note:

This method provides the data structure for visualization but requires external libraries like 'iruby' and 'numo' to perform actual dimensionality reduction Users should transform this data according to their visualization framework

Prepare data for visualization with dimensionality reduction

Parameters:

  • method (Symbol) (defaults to: :pca)

    Dimensionality reduction method (:pca or :tsne)

  • dimensions (Integer) (defaults to: 2)

    Number of dimensions to reduce to (1-3)

Returns:

  • (Array<Hash>)

    Array of points with reduced coordinates



318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
# File 'lib/geminize/models/embedding_response.rb', line 318

def prepare_visualization_data(method = :pca, dimensions = 2)
  unless [:pca, :tsne].include?(method)
    raise Geminize::ValidationError.new(
      "Unknown dimensionality reduction method: #{method}. Supported methods: :pca, :tsne",
      "INVALID_ARGUMENT"
    )
  end

  unless (1..3).cover?(dimensions)
    raise Geminize::ValidationError.new(
      "Dimensions must be between 1 and 3, got: #{dimensions}",
      "INVALID_ARGUMENT"
    )
  end

  if embeddings.empty?
    return []
  end

  # This implementation just returns the structure for visualization
  # The actual dimensionality reduction should be implemented by users
  # with their preferred libraries
  embeddings.each_with_index.map do |_, i|
    {
      index: i,
      # These coordinates would normally be calculated by dimensionality reduction
      coordinates: Array.new(dimensions) { 0.0 },
      # Additional fields that would be useful for visualization
      original_vector: embedding_at(i)
    }
  end
end

#prompt_tokensInteger?

Get the prompt token count

Returns:

  • (Integer, nil)

    Prompt token count or nil if not available



368
369
370
371
372
# File 'lib/geminize/models/embedding_response.rb', line 368

def prompt_tokens
  return nil unless @usage

  @usage["promptTokenCount"]
end

#raw_responseHash

Raw response data from the API

Returns:

  • (Hash)

    The complete raw API response



433
434
435
# File 'lib/geminize/models/embedding_response.rb', line 433

def raw_response
  @data
end

#resize(new_dim, method = :truncate, pad_value = 0.0) ⇒ Array<Array<Float>>

Resize embeddings to a different dimension

Parameters:

  • new_dim (Integer)

    New dimension size

  • method (Symbol) (defaults to: :truncate)

    Method to use for resizing (:truncate, :pad)

  • pad_value (Float) (defaults to: 0.0)

    Value to use for padding when using :pad method

Returns:

  • (Array<Array<Float>>)

    Resized embeddings

Raises:



835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
# File 'lib/geminize/models/embedding_response.rb', line 835

def resize(new_dim, method = :truncate, pad_value = 0.0)
  vecs = embeddings

  if vecs.empty?
    raise Geminize::ValidationError.new(
      "Cannot resize empty embeddings",
      "INVALID_ARGUMENT"
    )
  end

  if new_dim <= 0
    raise Geminize::ValidationError.new(
      "New dimension must be positive, got: #{new_dim}",
      "INVALID_ARGUMENT"
    )
  end

  unless [:truncate, :pad].include?(method)
    raise Geminize::ValidationError.new(
      "Unknown resize method: #{method}. Supported methods: :truncate, :pad",
      "INVALID_ARGUMENT"
    )
  end

  current_dim = dimensions

  case method
  when :truncate
    if new_dim > current_dim
      # If truncating but new_dim is larger, pad with zeros
      vecs.map do |vec|
        vec + Array.new(new_dim - current_dim, pad_value)
      end
    else
      # Otherwise truncate
      vecs.map { |vec| vec.take(new_dim) }
    end
  when :pad
    if new_dim > current_dim
      # Pad with specified value
      vecs.map do |vec|
        vec + Array.new(new_dim - current_dim, pad_value)
      end
    else
      # Truncate if new_dim is smaller
      vecs.map { |vec| vec.take(new_dim) }
    end
  end
end

#save(path, format = :json, options = {}) ⇒ Boolean

Save embeddings to a file

Parameters:

  • path (String)

    Path to save the file

  • format (Symbol) (defaults to: :json)

    Format to save in (:json, :csv, :binary)

  • options (Hash) (defaults to: {})

    Additional options for saving

Options Hash (options):

  • :pretty (Boolean)

    Format JSON with indentation (for :json format)

  • :include_header (Boolean)

    Include header with dimension indices (for :csv format)

  • :include_metadata (Boolean)

    Include metadata in the saved file

Returns:

  • (Boolean)

    True if successful

Raises:



585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
# File 'lib/geminize/models/embedding_response.rb', line 585

def save(path, format = :json, options = {})
  # Default options
  options = {
    pretty: false,
    include_header: true,
    include_metadata: true
  }.merge(options)

  begin
    File.open(path, "w") do |file|
      content = case format
      when :json
        data = {"embeddings" => embeddings}
        data["metadata"] =  if options[:include_metadata]

        options[:pretty] ? JSON.pretty_generate(data) : JSON.generate(data)
      when :csv
        to_csv(options[:include_header])
      when :binary
        raise Geminize::ValidationError.new(
          "Binary format not yet implemented",
          "INVALID_ARGUMENT"
        )
      else
        raise Geminize::ValidationError.new(
          "Unknown format: #{format}. Supported formats: :json, :csv",
          "INVALID_ARGUMENT"
        )
      end

      file.write(content)
    end

    true
  rescue => e
    raise Geminize::ValidationError.new(
      "Failed to save embeddings: #{e.message}",
      "IO_ERROR"
    )
  end
end

#similarity(index1, index2) ⇒ Float

Calculate the cosine similarity between two embedding indexes in this response

Parameters:

  • index1 (Integer)

    First embedding index

  • index2 (Integer)

    Second embedding index

Returns:

  • (Float)

    Cosine similarity (-1 to 1)

Raises:



115
116
117
118
119
120
121
122
123
124
125
126
127
128
# File 'lib/geminize/models/embedding_response.rb', line 115

def similarity(index1, index2)
  vec1 = embedding_at(index1)
  vec2 = embedding_at(index2)

  if vec1.nil?
    raise Geminize::ValidationError.new("Invalid embedding index: #{index1}", "INVALID_ARGUMENT")
  end

  if vec2.nil?
    raise Geminize::ValidationError.new("Invalid embedding index: #{index2}", "INVALID_ARGUMENT")
  end

  VectorUtils.cosine_similarity(vec1, vec2)
end

#similarity_matrix(metric = :cosine) ⇒ Array<Array<Float>>

Compute similarity matrix for all embeddings in this response

Parameters:

  • metric (Symbol) (defaults to: :cosine)

    Distance metric to use (:cosine or :euclidean)

Returns:

  • (Array<Array<Float>>)

    Matrix of similarity scores



148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
# File 'lib/geminize/models/embedding_response.rb', line 148

def similarity_matrix(metric = :cosine)
  vectors = embeddings
  return [] if vectors.empty?

  matrix = Array.new(vectors.length) { Array.new(vectors.length, 0.0) }

  vectors.each_with_index do |vec1, i|
    # Diagonal is always 1 (self-similarity)
    matrix[i][i] = 1.0

    # Only compute upper triangular matrix, then copy to lower
    ((i + 1)...vectors.length).each do |j|
      vec2 = vectors[j]
      similarity = case metric
      when :cosine
        VectorUtils.cosine_similarity(vec1, vec2)
      when :euclidean
        # Convert to similarity (higher is more similar)
        1.0 / (1.0 + VectorUtils.euclidean_distance(vec1, vec2))
      else
        raise Geminize::ValidationError.new(
          "Unknown metric: #{metric}. Supported metrics: :cosine, :euclidean",
          "INVALID_ARGUMENT"
        )
      end

      matrix[i][j] = similarity
      matrix[j][i] = similarity  # Matrix is symmetric
    end
  end

  matrix
end

#similarity_with_vector(index, other_vector) ⇒ Float

Calculate the cosine similarity between an embedding in this response and another vector

Parameters:

  • index (Integer)

    Embedding index in this response

  • other_vector (Array<Float>)

    External vector to compare with

Returns:

  • (Float)

    Cosine similarity (-1 to 1)

Raises:



135
136
137
138
139
140
141
142
143
# File 'lib/geminize/models/embedding_response.rb', line 135

def similarity_with_vector(index, other_vector)
  vec = embedding_at(index)

  if vec.nil?
    raise Geminize::ValidationError.new("Invalid embedding index: #{index}", "INVALID_ARGUMENT")
  end

  VectorUtils.cosine_similarity(vec, other_vector)
end

#single?Boolean

Check if the response is a single embedding (not a batch)

Returns:

  • (Boolean)

    True if the response contains a single embedding



38
39
40
41
# File 'lib/geminize/models/embedding_response.rb', line 38

def single?
  @data.has_key?("embedding") && @data["embedding"].is_a?(Hash) &&
    @data["embedding"].has_key?("values")
end

#slice(start, finish = nil) ⇒ Array<Array<Float>>

Get a subset of embeddings by indices

Parameters:

  • start (Integer)

    Start index (inclusive)

  • finish (Integer, nil) (defaults to: nil)

    End index (inclusive), or nil to select until the end

Returns:

  • (Array<Array<Float>>)

    Subset of embeddings

Raises:

  • (IndexError)

    If the range is invalid



501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
# File 'lib/geminize/models/embedding_response.rb', line 501

def slice(start, finish = nil)
  vecs = embeddings

  # Handle negative indices
  start = vecs.length + start if start < 0
  finish = vecs.length + finish if finish && finish < 0
  finish = vecs.length - 1 if finish.nil?

  # Validate range
  if start < 0 || start >= vecs.length
    raise IndexError, "Start index #{start} out of bounds for embeddings size #{vecs.length}"
  end

  if finish < start || finish >= vecs.length
    raise IndexError, "End index #{finish} out of bounds for embeddings size #{vecs.length}"
  end

  vecs[start..finish]
end

#to_aArray<Array<Float>>

Converts embeddings to a simple array

Returns:

  • (Array<Array<Float>>)

    Array of embedding vectors



455
456
457
# File 'lib/geminize/models/embedding_response.rb', line 455

def to_a
  embeddings
end

#to_csv(include_header = true) ⇒ String

Export embeddings to a CSV string

Parameters:

  • include_header (Boolean) (defaults to: true)

    Whether to include a header row with dimension indices

Returns:

  • (String)

    CSV representation of the embeddings



268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
# File 'lib/geminize/models/embedding_response.rb', line 268

def to_csv(include_header = true)
  return "" if embeddings.empty?

  dim = dimensions || 0
  csv_lines = []

  # Add header if requested
  if include_header
    header = (0...dim).map { |i| "dim_#{i}" }.join(",")
    csv_lines << header
  end

  # Add data rows
  embeddings.each do |vec|
    csv_lines << vec.join(",")
  end

  csv_lines.join("\n")
end

#to_hash_with_keys(keys) ⇒ Hash

Transform embeddings to a hash with specified keys

Parameters:

  • keys (Array<String>, nil)

    Keys to associate with each vector (must match number of embeddings)

Returns:

  • (Hash)

    Hash mapping keys to embedding vectors

Raises:



292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
# File 'lib/geminize/models/embedding_response.rb', line 292

def to_hash_with_keys(keys)
  vecs = embeddings

  if keys.nil?
    # Return a hash with numeric keys if no keys provided
    return vecs.each_with_index.map { |vec, i| [i.to_s, vec] }.to_h
  end

  unless keys.length == vecs.length
    raise Geminize::ValidationError.new(
      "Number of keys (#{keys.length}) doesn't match number of embeddings (#{vecs.length})",
      "INVALID_ARGUMENT"
    )
  end

  # Create hash mapping keys to vectors
  keys.zip(vecs).to_h
end

#to_json(pretty = false) ⇒ String

Export embeddings to a JSON string

Parameters:

  • pretty (Boolean) (defaults to: false)

    Whether to format the JSON with indentation

Returns:

  • (String)

    JSON representation of the embeddings



251
252
253
254
255
256
257
258
259
260
261
262
263
# File 'lib/geminize/models/embedding_response.rb', line 251

def to_json(pretty = false)
  data = {
    embeddings: embeddings,
    dimensions: dimensions,
    count: embeddings.length
  }

  if pretty
    JSON.pretty_generate(data)
  else
    JSON.generate(data)
  end
end

#to_numpy_formatHash

Note:

This method provides a structure that can be easily converted to a numpy array in Python or used with Ruby libraries that support numpy-compatible formats

Export embeddings to a Numpy-compatible format

Returns:

  • (Hash)

    A hash with ndarray compatible data structure



386
387
388
389
390
391
392
# File 'lib/geminize/models/embedding_response.rb', line 386

def to_numpy_format
  {
    data: embeddings,
    shape: [batch_size, dimensions || 0],
    dtype: "float32"
  }
end

#top_dimensions(k) ⇒ Array<Array<Float>>

Extract top K most significant dimensions from the embeddings

Parameters:

  • k (Integer)

    Number of dimensions to extract

Returns:

  • (Array<Array<Float>>)

    Embeddings with only the top K dimensions

Raises:



398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
# File 'lib/geminize/models/embedding_response.rb', line 398

def top_dimensions(k)
  dim = dimensions

  if dim.nil? || dim == 0
    raise Geminize::ValidationError.new("No embeddings found", "INVALID_ARGUMENT")
  end

  if k > dim
    raise Geminize::ValidationError.new(
      "Cannot extract #{k} dimensions from embeddings with only #{dim} dimensions",
      "INVALID_ARGUMENT"
    )
  end

  # This is a simplified approach that just takes the first K dimensions
  # A more sophisticated implementation would analyze variance or importance
  vecs = embeddings
  vecs.map { |vec| vec.take(k) }
end

#total_tokensInteger?

Get the total token count

Returns:

  • (Integer, nil)

    Total token count or nil if not available



360
361
362
363
364
# File 'lib/geminize/models/embedding_response.rb', line 360

def total_tokens
  return nil unless @usage

  (@usage["promptTokenCount"] || 0) + (@usage["totalTokenCount"] || 0)
end

#valuesArray<Float>

Get the embedding values as a flat array

Returns:

  • (Array<Float>)

    The embedding values



24
25
26
27
# File 'lib/geminize/models/embedding_response.rb', line 24

def values
  return @data["embedding"]["values"] if single?
  nil
end

#with_labels(labels) ⇒ Hash

Associates labels/texts with embeddings

Parameters:

  • labels (Array<String>)

    Labels to associate with embeddings

Returns:

  • (Hash)

    Hash mapping labels to embeddings

Raises:



463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
# File 'lib/geminize/models/embedding_response.rb', line 463

def with_labels(labels)
  unless labels.is_a?(Array)
    raise Geminize::ValidationError.new("Labels must be an array", "INVALID_ARGUMENT")
  end

  vecs = embeddings
  unless labels.length == vecs.length
    raise Geminize::ValidationError.new(
      "Number of labels (#{labels.length}) doesn't match number of embeddings (#{vecs.length})",
      "INVALID_ARGUMENT"
    )
  end

  # Create hash mapping labels to vectors
  labels.zip(vecs).to_h
end