Class: DSPy::Memory::LocalEmbeddingEngine

Inherits:
EmbeddingEngine show all
Extended by:
T::Sig
Defined in:
lib/dspy/memory/local_embedding_engine.rb

Overview

Local embedding engine using ankane/informers for privacy-preserving embeddings

Constant Summary collapse

DEFAULT_MODEL =

Default models supported by informers

'Xenova/all-MiniLM-L6-v2'
SUPPORTED_MODELS =
[
  'Xenova/all-MiniLM-L6-v2',
  'Xenova/all-MiniLM-L12-v2',
  'Xenova/multi-qa-MiniLM-L6-cos-v1',
  'Xenova/paraphrase-MiniLM-L6-v2'
].freeze

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from EmbeddingEngine

#cosine_similarity, #normalize_vector

Constructor Details

#initialize(model_name = DEFAULT_MODEL) ⇒ LocalEmbeddingEngine

Returns a new instance of LocalEmbeddingEngine.



28
29
30
31
32
33
34
35
# File 'lib/dspy/memory/local_embedding_engine.rb', line 28

def initialize(model_name = DEFAULT_MODEL)
  @model_name = model_name
  @model = T.let(nil, T.nilable(T.untyped))
  @embedding_dim = T.let(nil, T.nilable(Integer))
  @ready = T.let(false, T::Boolean)
  
  load_model!
end

Instance Attribute Details

#model_nameObject (readonly)

Returns the value of attribute model_name.



25
26
27
# File 'lib/dspy/memory/local_embedding_engine.rb', line 25

def model_name
  @model_name
end

Class Method Details

.model_supported?(model_name) ⇒ Boolean

Returns:

  • (Boolean)


103
104
105
# File 'lib/dspy/memory/local_embedding_engine.rb', line 103

def self.model_supported?(model_name)
  SUPPORTED_MODELS.include?(model_name)
end

.supported_modelsObject



109
110
111
# File 'lib/dspy/memory/local_embedding_engine.rb', line 109

def self.supported_models
  SUPPORTED_MODELS
end

Instance Method Details

#embed(text) ⇒ Object



38
39
40
41
42
43
44
45
46
47
48
49
50
# File 'lib/dspy/memory/local_embedding_engine.rb', line 38

def embed(text)
  ensure_ready!
  
  # Preprocess text
  cleaned_text = preprocess_text(text)
  
  # Generate embedding
  result = @model.call(cleaned_text)
  
  # Extract embedding array and normalize
  embedding = result.first.to_a
  normalize_vector(embedding)
end

#embed_batch(texts) ⇒ Object



53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# File 'lib/dspy/memory/local_embedding_engine.rb', line 53

def embed_batch(texts)
  ensure_ready!
  
  # Preprocess all texts
  cleaned_texts = texts.map { |text| preprocess_text(text) }
  
  # Generate embeddings in batch
  results = @model.call(cleaned_texts)
  
  # Extract and normalize embeddings
  results.map do |result|
    # Handle both single embeddings and batch results
    embedding = case result
               when Array
                 result.flatten  # Flatten in case of nested arrays
               else
                 result.to_a.flatten
               end
    normalize_vector(embedding)
  end
end

#embedding_dimensionObject



76
77
78
# File 'lib/dspy/memory/local_embedding_engine.rb', line 76

def embedding_dimension
  @embedding_dim || load_model_info!
end

#ready?Boolean

Returns:

  • (Boolean)


86
87
88
# File 'lib/dspy/memory/local_embedding_engine.rb', line 86

def ready?
  @ready
end

#statsObject



91
92
93
94
95
96
97
98
99
# File 'lib/dspy/memory/local_embedding_engine.rb', line 91

def stats
  {
    model_name: @model_name,
    embedding_dimension: embedding_dimension,
    ready: ready?,
    supported_models: SUPPORTED_MODELS,
    backend: 'informers'
  }
end