Class: Anngler::Index

Inherits:
Object
  • Object
show all
Defined in:
lib/anngler/index.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(bucket_name, n_projections, n_features, seed: nil, n_trees: 1, storage: Anngler::Storage::MemoryBackend.new) ⇒ Index

Returns a new instance of Index.



31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# File 'lib/anngler/index.rb', line 31

def initialize(
        bucket_name,
        n_projections,
        n_features,
        seed: nil,
        n_trees: 1,
        storage: Anngler::Storage::MemoryBackend.new
    )
    @n_projections = n_projections
    @n_features = n_features
    @seed = seed
    @seed ||= bucket_name.to_i(36)
    @bucket_name = bucket_name
    @rng = Random.new(@seed)
    @n_trees = n_trees
    @storage = storage

    gen_trees
end

Instance Attribute Details

#bucket_nameObject (readonly)

the name of the bucket to allow multiple different hash tables in redis



17
18
19
# File 'lib/anngler/index.rb', line 17

def bucket_name
  @bucket_name
end

#n_featuresObject (readonly)

the number of features of the vectors we are storing



8
9
10
# File 'lib/anngler/index.rb', line 8

def n_features
  @n_features
end

#n_projectionsObject (readonly)

the number of projections to generate (more = less vectors per bucket)



11
12
13
# File 'lib/anngler/index.rb', line 11

def n_projections
  @n_projections
end

#n_treesObject (readonly)

how many different projections to overlap (more allows for better accuracy but will slow performance)



26
27
28
# File 'lib/anngler/index.rb', line 26

def n_trees
  @n_trees
end

#rngObject (readonly)

the random number generator for the projection matrices



20
21
22
# File 'lib/anngler/index.rb', line 20

def rng
  @rng
end

#seedObject (readonly)

seed for our random number generator, we ensure this is deterministic buy resorting to the base16 of the bucket name if no seed is provided



14
15
16
# File 'lib/anngler/index.rb', line 14

def seed
  @seed
end

#storageObject (readonly)

which storage service to use (either redis or local memory)



29
30
31
# File 'lib/anngler/index.rb', line 29

def storage
  @storage
end

#treesObject (readonly)

an n_trees x n_features x n_projections matrix to store our projections



23
24
25
# File 'lib/anngler/index.rb', line 23

def trees
  @trees
end

Instance Method Details

#add(vec, label: "") ⇒ Object



51
52
53
54
55
56
57
58
59
60
61
# File 'lib/anngler/index.rb', line 51

def add(vec, label: "")
    hashes = calc_hashes(vec)
    #Serialize the vector and the label
    serialized_data = pack_data(vec, label)

    #add the vector into each tree
    hashes.each_with_index do |hash, i|
        bucket = "#{@bucket_name}:#{i}:#{hash2string(hash)}"
        @storage.add_vector(bucket, serialized_data)
    end
end

#query(vec) ⇒ Object



73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# File 'lib/anngler/index.rb', line 73

def query(vec)
    hashes = calc_hashes(vec)
    raw_results = []

    #search each tree and append the results into raw_results
    hashes.each_with_index do |hash, i|
        bucket = "#{@bucket_name}:#{i}:#{hash2string(hash)}"
        raw_results += @storage.query_bucket(bucket)
    end

    #remove duplicates and decode the data
    raw_results.uniq.map do |encoded_data|
        unpack_data(encoded_data)
    end.sort_by do |data|
        #sort the results by cosine distance
        Helpers.cosine_distance(vec, data["vec"])
    end
end

#remove(vec) ⇒ Object



63
64
65
66
67
68
69
70
71
# File 'lib/anngler/index.rb', line 63

def remove(vec)
    hashes = calc_hashes(vec)

    #remove vector from each tree
    hashes.each_with_index do |hash, i|
        bucket = "#{@bucket_name}:#{i}:#{hash2string(hash)}"
        @storage.remove_vector(bucket, encode_vec(vec))
    end
end