Class: Anngler::Index
- Inherits:
-
Object
- Object
- Anngler::Index
- Defined in:
- lib/anngler/index.rb
Instance Attribute Summary collapse
-
#bucket_name ⇒ Object
readonly
the name of the bucket to allow multiple different hash tables in redis.
-
#n_features ⇒ Object
readonly
the number of features of the vectors we are storing.
-
#n_projections ⇒ Object
readonly
the number of projections to generate (more = less vectors per bucket).
-
#n_trees ⇒ Object
readonly
how many different projections to overlap (more allows for better accuracy but will slow performance).
-
#rng ⇒ Object
readonly
the random number generator for the projection matrices.
-
#seed ⇒ Object
readonly
seed for our random number generator, we ensure this is deterministic buy resorting to the base16 of the bucket name if no seed is provided.
-
#storage ⇒ Object
readonly
which storage service to use (either redis or local memory).
-
#trees ⇒ Object
readonly
an n_trees x n_features x n_projections matrix to store our projections.
Instance Method Summary collapse
- #add(vec, label: "") ⇒ Object
-
#initialize(bucket_name, n_projections, n_features, seed: nil, n_trees: 1, storage: Anngler::Storage::MemoryBackend.new) ⇒ Index
constructor
A new instance of Index.
- #query(vec) ⇒ Object
- #remove(vec) ⇒ Object
Constructor Details
#initialize(bucket_name, n_projections, n_features, seed: nil, n_trees: 1, storage: Anngler::Storage::MemoryBackend.new) ⇒ Index
Returns a new instance of Index.
31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
# File 'lib/anngler/index.rb', line 31 def initialize( bucket_name, n_projections, n_features, seed: nil, n_trees: 1, storage: Anngler::Storage::MemoryBackend.new ) @n_projections = n_projections @n_features = n_features @seed = seed @seed ||= bucket_name.to_i(36) @bucket_name = bucket_name @rng = Random.new(@seed) @n_trees = n_trees @storage = storage gen_trees end |
Instance Attribute Details
#bucket_name ⇒ Object (readonly)
the name of the bucket to allow multiple different hash tables in redis
17 18 19 |
# File 'lib/anngler/index.rb', line 17 def bucket_name @bucket_name end |
#n_features ⇒ Object (readonly)
the number of features of the vectors we are storing
8 9 10 |
# File 'lib/anngler/index.rb', line 8 def n_features @n_features end |
#n_projections ⇒ Object (readonly)
the number of projections to generate (more = less vectors per bucket)
11 12 13 |
# File 'lib/anngler/index.rb', line 11 def n_projections @n_projections end |
#n_trees ⇒ Object (readonly)
how many different projections to overlap (more allows for better accuracy but will slow performance)
26 27 28 |
# File 'lib/anngler/index.rb', line 26 def n_trees @n_trees end |
#rng ⇒ Object (readonly)
the random number generator for the projection matrices
20 21 22 |
# File 'lib/anngler/index.rb', line 20 def rng @rng end |
#seed ⇒ Object (readonly)
seed for our random number generator, we ensure this is deterministic buy resorting to the base16 of the bucket name if no seed is provided
14 15 16 |
# File 'lib/anngler/index.rb', line 14 def seed @seed end |
#storage ⇒ Object (readonly)
which storage service to use (either redis or local memory)
29 30 31 |
# File 'lib/anngler/index.rb', line 29 def storage @storage end |
#trees ⇒ Object (readonly)
an n_trees x n_features x n_projections matrix to store our projections
23 24 25 |
# File 'lib/anngler/index.rb', line 23 def trees @trees end |
Instance Method Details
#add(vec, label: "") ⇒ Object
51 52 53 54 55 56 57 58 59 60 61 |
# File 'lib/anngler/index.rb', line 51 def add(vec, label: "") hashes = calc_hashes(vec) #Serialize the vector and the label serialized_data = pack_data(vec, label) #add the vector into each tree hashes.each_with_index do |hash, i| bucket = "#{@bucket_name}:#{i}:#{hash2string(hash)}" @storage.add_vector(bucket, serialized_data) end end |
#query(vec) ⇒ Object
73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
# File 'lib/anngler/index.rb', line 73 def query(vec) hashes = calc_hashes(vec) raw_results = [] #search each tree and append the results into raw_results hashes.each_with_index do |hash, i| bucket = "#{@bucket_name}:#{i}:#{hash2string(hash)}" raw_results += @storage.query_bucket(bucket) end #remove duplicates and decode the data raw_results.uniq.map do |encoded_data| unpack_data(encoded_data) end.sort_by do |data| #sort the results by cosine distance Helpers.cosine_distance(vec, data["vec"]) end end |
#remove(vec) ⇒ Object
63 64 65 66 67 68 69 70 71 |
# File 'lib/anngler/index.rb', line 63 def remove(vec) hashes = calc_hashes(vec) #remove vector from each tree hashes.each_with_index do |hash, i| bucket = "#{@bucket_name}:#{i}:#{hash2string(hash)}" @storage.remove_vector(bucket, encode_vec(vec)) end end |