Class: Ollama::Documents::Cache::SQLiteCache

Inherits:
Object
  • Object
show all
Includes:
Enumerable, Common
Defined in:
lib/ollama/documents/cache/sqlite_cache.rb

Instance Attribute Summary collapse

Attributes included from Common

#prefix

Instance Method Summary collapse

Methods included from Common

#collections, #pre, #unpre

Methods included from Utils::Math

#cosine_similarity, #norm

Constructor Details

#initialize(prefix:, embedding_length: 1_024, filename: ':memory:', debug: false) ⇒ SQLiteCache



9
10
11
12
13
14
15
# File 'lib/ollama/documents/cache/sqlite_cache.rb', line 9

def initialize(prefix:, embedding_length: 1_024, filename: ':memory:', debug: false)
  super(prefix:)
  @embedding_length = embedding_length
  @filename         = filename
  @debug            = debug
  setup_database(filename)
end

Instance Attribute Details

#embedding_lengthObject (readonly)

length of the embeddings vector



19
20
21
# File 'lib/ollama/documents/cache/sqlite_cache.rb', line 19

def embedding_length
  @embedding_length
end

#filenameObject (readonly)

filename for the database, :memory: is in memory



17
18
19
# File 'lib/ollama/documents/cache/sqlite_cache.rb', line 17

def filename
  @filename
end

Instance Method Details

#[](key) ⇒ Object



21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# File 'lib/ollama/documents/cache/sqlite_cache.rb', line 21

def [](key)
  result = execute(
    %{
      SELECT records.key, records.text, records.norm, records.source,
        records.tags, embeddings.embedding
      FROM records
      INNER JOIN embeddings ON records.embedding_id = embeddings.rowid
      WHERE records.key = ?
    },
    pre(key)
  )&.first or return
  key, text, norm, source, tags, embedding = *result
  embedding = embedding.unpack("f*")
  tags      = Ollama::Utils::Tags.new(JSON(tags.to_s).to_a, source:)
  convert_value_to_record(key:, text:, norm:, source:, tags:, embedding:)
end

#[]=(key, value) ⇒ Object



38
39
40
41
42
43
44
45
46
47
48
49
# File 'lib/ollama/documents/cache/sqlite_cache.rb', line 38

def []=(key, value)
  value = convert_value_to_record(value)
  embedding = value.embedding.pack("f*")
  execute(%{BEGIN})
  execute(%{INSERT INTO embeddings(embedding) VALUES(?)}, [ embedding ])
  embedding_id, = execute(%{ SELECT last_insert_rowid() }).flatten
  execute(%{
    INSERT INTO records(key,text,embedding_id,norm,source,tags)
    VALUES(?,?,?,?,?,?)
  }, [ pre(key), value.text, embedding_id, value.norm, value.source, JSON(value.tags) ])
  execute(%{COMMIT})
end

#clearObject



94
95
96
97
# File 'lib/ollama/documents/cache/sqlite_cache.rb', line 94

def clear
  execute(%{DELETE FROM records WHERE key LIKE ?}, [ "#@prefix%" ])
  self
end

#clear_for_tags(tags = nil) ⇒ Object



82
83
84
85
86
87
88
89
90
91
92
# File 'lib/ollama/documents/cache/sqlite_cache.rb', line 82

def clear_for_tags(tags = nil)
  tags = Ollama::Utils::Tags.new(tags).to_a
  if tags.present?
    records = find_records_for_tags(tags)
    keys = '(%s)' % records.transpose.first.map { "'%s'" % quote(_1) }.join(?,)
    execute(%{DELETE FROM records WHERE key IN #{keys}})
  else
    clear
  end
  self
end

#convert_to_vector(vector) ⇒ Object



119
120
121
# File 'lib/ollama/documents/cache/sqlite_cache.rb', line 119

def convert_to_vector(vector)
  vector
end

#delete(key) ⇒ Object



58
59
60
61
62
63
64
65
# File 'lib/ollama/documents/cache/sqlite_cache.rb', line 58

def delete(key)
  result = key?(key) ? pre(key) : nil
  execute(
    %{ DELETE FROM records WHERE records.key = ? },
    pre(key)
  )
  result
end

#each(prefix: "#@prefix%", &block) ⇒ Object



99
100
101
102
103
104
105
106
107
108
109
110
111
112
# File 'lib/ollama/documents/cache/sqlite_cache.rb', line 99

def each(prefix: "#@prefix%", &block)
  execute(%{
    SELECT records.key, records.text, records.norm, records.source,
      records.tags, embeddings.embedding
    FROM records
    INNER JOIN embeddings ON records.embedding_id = embeddings.rowid
    WHERE records.key LIKE ?
  }, [ prefix ]).each do |key, text, norm, source, tags, embedding|
    embedding = embedding.unpack("f*")
    tags      = Ollama::Utils::Tags.new(JSON(tags.to_s).to_a, source:)
    value     = convert_value_to_record(key:, text:, norm:, source:, tags:, embedding:)
    block.(key, value)
  end
end

#find_records(needle, tags: nil, max_records: nil) ⇒ Object



145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
# File 'lib/ollama/documents/cache/sqlite_cache.rb', line 145

def find_records(needle, tags: nil, max_records: nil)
  needle.size != @embedding_length and
    raise ArgumentError, "needle embedding length != %s" % @embedding_length
  needle_binary = needle.pack("f*")
  max_records   = [ max_records, size, 4_096 ].compact.min
  records = find_records_for_tags(tags)
  rowids_where = '(%s)' % records.transpose.last&.join(?,)
  execute(%{
    SELECT records.key, records.text, records.norm, records.source,
      records.tags, embeddings.embedding
    FROM records
    INNER JOIN embeddings ON records.embedding_id = embeddings.rowid
    WHERE embeddings.rowid IN #{rowids_where}
      AND embeddings.embedding MATCH ? AND embeddings.k = ?
  }, [ needle_binary, max_records ]).map do |key, text, norm, source, tags, embedding|
    key       = unpre(key)
    embedding = embedding.unpack("f*")
    tags      = Ollama::Utils::Tags.new(JSON(tags.to_s).to_a, source:)
    convert_value_to_record(key:, text:, norm:, source:, tags:, embedding:)
  end
end

#find_records_for_tags(tags) ⇒ Object



123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
# File 'lib/ollama/documents/cache/sqlite_cache.rb', line 123

def find_records_for_tags(tags)
  if tags.present?
    tags_filter = Ollama::Utils::Tags.new(tags).to_a
    unless  tags_filter.empty?
      tags_where  = ' AND (%s)' % tags_filter.map {
        'tags LIKE "%%%s%%"' % quote(_1)
      }.join(' OR ')
    end
  end
  records = execute(%{
    SELECT key, tags, embedding_id
    FROM records
    WHERE key LIKE ?#{tags_where}
  }, [ "#@prefix%" ])
  if tags_filter
    records = records.select { |key, tags, embedding_id|
      (tags_filter & JSON(tags.to_s).to_a).size >= 1
    }
  end
  records
end

#full_each(&block) ⇒ Object



115
116
117
# File 'lib/ollama/documents/cache/sqlite_cache.rb', line 115

def full_each(&block)
  each(prefix: ?%, &block)
end

#key?(key) ⇒ Boolean



51
52
53
54
55
56
# File 'lib/ollama/documents/cache/sqlite_cache.rb', line 51

def key?(key)
  execute(
    %{ SELECT count(records.key) FROM records WHERE records.key = ? },
    pre(key)
  ).flatten.first == 1
end

#sizeObject



78
79
80
# File 'lib/ollama/documents/cache/sqlite_cache.rb', line 78

def size
  execute(%{SELECT COUNT(*) FROM records WHERE key LIKE ?}, [ "#@prefix%" ]).flatten.first
end

#tagsObject



67
68
69
70
71
72
73
74
75
76
# File 'lib/ollama/documents/cache/sqlite_cache.rb', line 67

def tags
  result = Ollama::Utils::Tags.new
  execute(%{
      SELECT DISTINCT(tags) FROM records WHERE key LIKE ?
    }, [ "#@prefix%" ]
  ).flatten.each do
    JSON(_1).each { |t| result.add(t) }
  end
  result
end