Class: DSPy::Memory::LocalEmbeddingEngine
Overview
Local embedding engine using ankane/informers for privacy-preserving embeddings
Constant Summary
collapse
- DEFAULT_MODEL =
Default models supported by informers
'Xenova/all-MiniLM-L6-v2'
- SUPPORTED_MODELS =
[
'Xenova/all-MiniLM-L6-v2',
'Xenova/all-MiniLM-L12-v2',
'Xenova/multi-qa-MiniLM-L6-cos-v1',
'Xenova/paraphrase-MiniLM-L6-v2'
].freeze
Instance Attribute Summary collapse
Class Method Summary
collapse
Instance Method Summary
collapse
#cosine_similarity, #normalize_vector
Constructor Details
#initialize(model_name = DEFAULT_MODEL) ⇒ LocalEmbeddingEngine
Returns a new instance of LocalEmbeddingEngine.
28
29
30
31
32
33
34
35
|
# File 'lib/dspy/memory/local_embedding_engine.rb', line 28
def initialize(model_name = DEFAULT_MODEL)
@model_name = model_name
@model = T.let(nil, T.nilable(T.untyped))
@embedding_dim = T.let(nil, T.nilable(Integer))
@ready = T.let(false, T::Boolean)
load_model!
end
|
Instance Attribute Details
#model_name ⇒ Object
Returns the value of attribute model_name.
25
26
27
|
# File 'lib/dspy/memory/local_embedding_engine.rb', line 25
def model_name
@model_name
end
|
Class Method Details
.model_supported?(model_name) ⇒ Boolean
106
107
108
|
# File 'lib/dspy/memory/local_embedding_engine.rb', line 106
def self.model_supported?(model_name)
SUPPORTED_MODELS.include?(model_name)
end
|
.supported_models ⇒ Object
112
113
114
|
# File 'lib/dspy/memory/local_embedding_engine.rb', line 112
def self.supported_models
SUPPORTED_MODELS
end
|
Instance Method Details
#embed(text) ⇒ Object
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
|
# File 'lib/dspy/memory/local_embedding_engine.rb', line 38
def embed(text)
DSPy::Context.with_span(
operation: 'embedding.generate',
**DSPy::ObservationType::Embedding.langfuse_attributes,
'embedding.model' => @model_name,
'embedding.input' => text[0..200], 'embedding.input_length' => text.length
) do |span|
ensure_ready!
cleaned_text = preprocess_text(text)
result = @model.call(cleaned_text)
embedding = result.first.to_a
normalized = normalize_vector(embedding)
if span
span.set_attribute('embedding.dimension', normalized.length)
span.set_attribute('embedding.magnitude', Math.sqrt(normalized.sum { |x| x * x }))
end
normalized
end
end
|
#embed_batch(texts) ⇒ Object
69
70
71
72
73
74
75
76
|
# File 'lib/dspy/memory/local_embedding_engine.rb', line 69
def embed_batch(texts)
ensure_ready!
texts.map do |text|
embed(text)
end
end
|
#embedding_dimension ⇒ Object
79
80
81
|
# File 'lib/dspy/memory/local_embedding_engine.rb', line 79
def embedding_dimension
@embedding_dim || load_model_info!
end
|
#ready? ⇒ Boolean
89
90
91
|
# File 'lib/dspy/memory/local_embedding_engine.rb', line 89
def ready?
@ready
end
|
#stats ⇒ Object
94
95
96
97
98
99
100
101
102
|
# File 'lib/dspy/memory/local_embedding_engine.rb', line 94
def stats
{
model_name: @model_name,
embedding_dimension: embedding_dimension,
ready: ready?,
supported_models: SUPPORTED_MODELS,
backend: 'informers'
}
end
|