Class: DSPy::Memory::LocalEmbeddingEngine

Inherits:
EmbeddingEngine show all
Extended by:
T::Sig
Defined in:
lib/dspy/memory/local_embedding_engine.rb

Overview

Local embedding engine using ankane/informers for privacy-preserving embeddings

Constant Summary collapse

DEFAULT_MODEL =

Default models supported by informers

'Xenova/all-MiniLM-L6-v2'
SUPPORTED_MODELS =
[
  'Xenova/all-MiniLM-L6-v2',
  'Xenova/all-MiniLM-L12-v2',
  'Xenova/multi-qa-MiniLM-L6-cos-v1',
  'Xenova/paraphrase-MiniLM-L6-v2'
].freeze

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from EmbeddingEngine

#cosine_similarity, #normalize_vector

Constructor Details

#initialize(model_name = DEFAULT_MODEL) ⇒ LocalEmbeddingEngine

Returns a new instance of LocalEmbeddingEngine.



28
29
30
31
32
33
34
35
# File 'lib/dspy/memory/local_embedding_engine.rb', line 28

def initialize(model_name = DEFAULT_MODEL)
  @model_name = model_name
  @model = T.let(nil, T.nilable(T.untyped))
  @embedding_dim = T.let(nil, T.nilable(Integer))
  @ready = T.let(false, T::Boolean)
  
  load_model!
end

Instance Attribute Details

#model_nameObject (readonly)

Returns the value of attribute model_name.



25
26
27
# File 'lib/dspy/memory/local_embedding_engine.rb', line 25

def model_name
  @model_name
end

Class Method Details

.model_supported?(model_name) ⇒ Boolean

Returns:

  • (Boolean)


106
107
108
# File 'lib/dspy/memory/local_embedding_engine.rb', line 106

def self.model_supported?(model_name)
  SUPPORTED_MODELS.include?(model_name)
end

.supported_modelsObject



112
113
114
# File 'lib/dspy/memory/local_embedding_engine.rb', line 112

def self.supported_models
  SUPPORTED_MODELS
end

Instance Method Details

#embed(text) ⇒ Object



38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# File 'lib/dspy/memory/local_embedding_engine.rb', line 38

def embed(text)
  DSPy::Context.with_span(
    operation: 'embedding.generate',
    **DSPy::ObservationType::Embedding.langfuse_attributes,
    'embedding.model' => @model_name,
    'embedding.input' => text[0..200], # Truncate for logging
    'embedding.input_length' => text.length
  ) do |span|
    ensure_ready!
    
    # Preprocess text
    cleaned_text = preprocess_text(text)
    
    # Generate embedding
    result = @model.call(cleaned_text)
    
    # Extract embedding array and normalize
    embedding = result.first.to_a
    normalized = normalize_vector(embedding)
    
    # Add embedding metadata to span
    if span
      span.set_attribute('embedding.dimension', normalized.length)
      span.set_attribute('embedding.magnitude', Math.sqrt(normalized.sum { |x| x * x }))
    end
    
    normalized
  end
end

#embed_batch(texts) ⇒ Object



69
70
71
72
73
74
75
76
# File 'lib/dspy/memory/local_embedding_engine.rb', line 69

def embed_batch(texts)
  ensure_ready!
  
  # Generate embeddings one by one (informers doesn't support true batch processing)
  texts.map do |text|
    embed(text)
  end
end

#embedding_dimensionObject



79
80
81
# File 'lib/dspy/memory/local_embedding_engine.rb', line 79

def embedding_dimension
  @embedding_dim || load_model_info!
end

#ready?Boolean

Returns:

  • (Boolean)


89
90
91
# File 'lib/dspy/memory/local_embedding_engine.rb', line 89

def ready?
  @ready
end

#statsObject



94
95
96
97
98
99
100
101
102
# File 'lib/dspy/memory/local_embedding_engine.rb', line 94

def stats
  {
    model_name: @model_name,
    embedding_dimension: embedding_dimension,
    ready: ready?,
    supported_models: SUPPORTED_MODELS,
    backend: 'informers'
  }
end