Module: RubyLLM::SemanticCache

Defined in:
lib/ruby_llm/semantic_cache.rb,
lib/ruby_llm/semantic_cache/entry.rb,
lib/ruby_llm/semantic_cache/scoped.rb,
lib/ruby_llm/semantic_cache/version.rb,
lib/ruby_llm/semantic_cache/embedding.rb,
lib/ruby_llm/semantic_cache/middleware.rb,
lib/ruby_llm/semantic_cache/serializer.rb,
lib/ruby_llm/semantic_cache/configuration.rb,
lib/ruby_llm/semantic_cache/cache_stores/base.rb,
lib/ruby_llm/semantic_cache/cache_stores/redis.rb,
lib/ruby_llm/semantic_cache/vector_stores/base.rb,
lib/ruby_llm/semantic_cache/cache_stores/memory.rb,
lib/ruby_llm/semantic_cache/vector_stores/redis.rb,
lib/ruby_llm/semantic_cache/vector_stores/memory.rb

Defined Under Namespace

Modules: CacheStores, Serializer, VectorStores Classes: Configuration, ConfigurationError, Embedding, Entry, Error, Middleware, NotFoundError, Scoped, ScopedMiddleware

Constant Summary collapse

VERSION =
"0.1.0"

Class Method Summary collapse

Class Method Details

.cache_storeObject

This method is part of a private API. You should avoid using this method if possible, as it may be removed or be changed in the future.



233
234
235
# File 'lib/ruby_llm/semantic_cache.rb', line 233

def cache_store
  @cache_store ||= build_cache_store
end

.clear!Object

Clear all cached entries



145
146
147
148
149
# File 'lib/ruby_llm/semantic_cache.rb', line 145

def clear!
  vector_store.clear!
  cache_store.clear!
  reset_stats!
end

.configConfiguration

Get the current configuration

Returns:



30
31
32
# File 'lib/ruby_llm/semantic_cache.rb', line 30

def config
  @config ||= Configuration.new
end

.configure {|Configuration| ... } ⇒ Object

Configure the cache

Yields:



23
24
25
26
# File 'lib/ruby_llm/semantic_cache.rb', line 23

def configure
  yield(config)
  reset! # Reset stores when configuration changes
end

.delete(query, threshold: nil) ⇒ Boolean

Delete a cached entry by query

Parameters:

  • query (String)

    the query to delete

  • threshold (Float) (defaults to: nil)

    similarity threshold for matching

Returns:

  • (Boolean)

    true if an entry was deleted



131
132
133
134
135
136
137
138
139
140
141
142
# File 'lib/ruby_llm/semantic_cache.rb', line 131

def delete(query, threshold: nil)
  threshold ||= config.similarity_threshold
  embedding = embedding_generator.generate(query)
  matches = vector_store.search(embedding, limit: 1)

  return false unless matches.any? && matches.first[:similarity] >= threshold

  id = matches.first[:id]
  vector_store.delete(id)
  cache_store.delete(id)
  true
end

.embedding_generatorObject

This method is part of a private API. You should avoid using this method if possible, as it may be removed or be changed in the future.

Access internal components (for middleware)



223
224
225
# File 'lib/ruby_llm/semantic_cache.rb', line 223

def embedding_generator
  @embedding_generator ||= Embedding.new(config)
end

.exists?(query, threshold: nil) ⇒ Boolean

Check if a similar query exists in the cache

Parameters:

  • query (String)

    the query to check

  • threshold (Float) (defaults to: nil)

    similarity threshold

Returns:

  • (Boolean)


120
121
122
123
124
125
# File 'lib/ruby_llm/semantic_cache.rb', line 120

def exists?(query, threshold: nil)
  threshold ||= config.similarity_threshold
  embedding = embedding_generator.generate(query)
  matches = vector_store.search(embedding, limit: 1)
  matches.any? && matches.first[:similarity] >= threshold
end

.fetch(query, threshold: nil, ttl: nil, &block) ⇒ Object

Fetch a cached response or execute the block and cache the result

Parameters:

  • query (String)

    the query to cache

  • threshold (Float) (defaults to: nil)

    similarity threshold (overrides config)

  • ttl (Integer) (defaults to: nil)

    time-to-live in seconds (overrides config)

Returns:

  • the cached or computed response

Raises:

  • (ArgumentError)


39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# File 'lib/ruby_llm/semantic_cache.rb', line 39

def fetch(query, threshold: nil, ttl: nil, &block)
  raise ArgumentError, "Block required" unless block_given?

  threshold ||= config.similarity_threshold
  ttl ||= config.ttl_seconds

  # Generate embedding for the query
  embedding = embedding_generator.generate(query)

  # Search for similar cached queries
  matches = vector_store.search(embedding, limit: 1)

  if matches.any? && matches.first[:similarity] >= threshold
    # Cache hit
    record_hit!
    entry_data = cache_store.get(matches.first[:id])

    if entry_data
      return Serializer.deserialize(entry_data[:response])
    end
  end

  # Cache miss - execute block
  record_miss!
  response = block.call

  # Store in cache
  store(query: query, response: response, embedding: embedding, ttl: ttl)

  response
end

.invalidate(query, threshold: nil, limit: 100) ⇒ Integer

Invalidate all cache entries similar to the given query

Parameters:

  • query (String)

    the query to match against

  • threshold (Float) (defaults to: nil)

    similarity threshold (defaults to config)

  • limit (Integer) (defaults to: 100)

    maximum entries to invalidate

Returns:

  • (Integer)

    number of entries invalidated



156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
# File 'lib/ruby_llm/semantic_cache.rb', line 156

def invalidate(query, threshold: nil, limit: 100)
  threshold ||= config.similarity_threshold
  embedding = embedding_generator.generate(query)
  matches = vector_store.search(embedding, limit: limit)

  count = 0
  matches.each do |match|
    next unless match[:similarity] >= threshold

    vector_store.delete(match[:id])
    cache_store.delete(match[:id])
    count += 1
  end

  count
end

.record_hit!Object

This method is part of a private API. You should avoid using this method if possible, as it may be removed or be changed in the future.



238
239
240
241
242
# File 'lib/ruby_llm/semantic_cache.rb', line 238

def record_hit!
  load_stats!
  @hits += 1
  persist_stats!
end

.record_miss!Object

This method is part of a private API. You should avoid using this method if possible, as it may be removed or be changed in the future.



245
246
247
248
249
# File 'lib/ruby_llm/semantic_cache.rb', line 245

def record_miss!
  load_stats!
  @misses += 1
  persist_stats!
end

.reset!Object

Reset the cache stores (clears stores but preserves configuration)



186
187
188
189
190
191
192
193
# File 'lib/ruby_llm/semantic_cache.rb', line 186

def reset!
  @embedding_generator = nil
  @vector_store = nil
  @cache_store = nil
  @stats_loaded = false
  @hits = 0
  @misses = 0
end

.reset_all!Object

Fully reset including configuration (useful for testing)



196
197
198
199
# File 'lib/ruby_llm/semantic_cache.rb', line 196

def reset_all!
  @config = nil
  reset!
end

.search(query, limit: 5) ⇒ Array<Hash>

Search for similar cached queries

Parameters:

  • query (String)

    the query to search for

  • limit (Integer) (defaults to: 5)

    maximum number of results

Returns:

  • (Array<Hash>)

    matching entries with similarity scores



99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
# File 'lib/ruby_llm/semantic_cache.rb', line 99

def search(query, limit: 5)
  embedding = embedding_generator.generate(query)
  matches = vector_store.search(embedding, limit: limit)

  matches.filter_map do |match|
    entry_data = cache_store.get(match[:id])
    next unless entry_data

    {
      query: entry_data[:query],
      response: Serializer.deserialize(entry_data[:response]),
      similarity: match[:similarity],
      metadata: entry_data[:metadata]
    }
  end
end

.statsHash

Get cache statistics

Returns:

  • (Hash)

    cache statistics



175
176
177
178
179
180
181
182
183
# File 'lib/ruby_llm/semantic_cache.rb', line 175

def stats
  load_stats!
  {
    hits: @hits,
    misses: @misses,
    hit_rate: hit_rate,
    entries: cache_store.size
  }
end

.store(query:, response:, embedding: nil, metadata: {}, ttl: nil) ⇒ Entry

Store a response in the cache

Parameters:

  • query (String)

    the query

  • response

    the response to cache

  • embedding (Array<Float>) (defaults to: nil)

    pre-computed embedding (optional)

  • metadata (Hash) (defaults to: {})

    additional metadata

  • ttl (Integer) (defaults to: nil)

    time-to-live in seconds

Returns:

  • (Entry)

    the created entry



78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# File 'lib/ruby_llm/semantic_cache.rb', line 78

def store(query:, response:, embedding: nil, metadata: {}, ttl: nil)
  embedding ||= embedding_generator.generate(query)
  ttl ||= config.ttl_seconds

  entry = Entry.new(
    query: query,
    response: Serializer.serialize(response),
    embedding: embedding,
    metadata: 
  )

  vector_store.add(entry.id, embedding)
  cache_store.set(entry.id, entry.to_h, ttl: ttl)

  entry
end

.vector_storeObject

This method is part of a private API. You should avoid using this method if possible, as it may be removed or be changed in the future.



228
229
230
# File 'lib/ruby_llm/semantic_cache.rb', line 228

def vector_store
  @vector_store ||= build_vector_store
end

.wrap(chat, threshold: nil, ttl: nil, on_cache_hit: nil, max_messages: nil) ⇒ Middleware

Wrap a RubyLLM::Chat instance with caching middleware

Parameters:

  • chat (RubyLLM::Chat)

    the chat instance to wrap

  • threshold (Float, nil) (defaults to: nil)

    similarity threshold override

  • ttl (Integer, nil) (defaults to: nil)

    TTL override in seconds

  • on_cache_hit (Proc, nil) (defaults to: nil)

    callback for cache hits, receives (chat, user_message, cached_response)

  • max_messages (Integer, :unlimited, false, nil) (defaults to: nil)

    max conversation messages before skipping cache

    • Integer: skip cache after N messages (default: 1, only first message cached)

    • :unlimited or false: cache all messages regardless of conversation length

    • nil: use config default

Returns:



211
212
213
214
215
216
217
218
219
# File 'lib/ruby_llm/semantic_cache.rb', line 211

def wrap(chat, threshold: nil, ttl: nil, on_cache_hit: nil, max_messages: nil)
  Middleware.new(
    chat,
    threshold: threshold,
    ttl: ttl,
    on_cache_hit: on_cache_hit,
    max_messages: max_messages
  )
end