Class: BloomFilter

Inherits:
Object
  • Object
show all
Defined in:
lib/bloomfilter.rb,
ext/sbloomfilter.c

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.new(*args) ⇒ Object



76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# File 'ext/sbloomfilter.c', line 76

static VALUE bf_s_new(int argc, VALUE *argv, VALUE self) {
    struct BloomFilter *bf;
    VALUE arg1, arg2, arg3, arg4, arg5, obj;
    int m, k, s, b, r, bytes;

    obj = Data_Make_Struct(self, struct BloomFilter, NULL, bits_free, bf);

    /* default = Fugou approach :-) */
    arg1 = INT2FIX(100000000);
    arg2 = INT2FIX(4);
    arg3 = INT2FIX(0);
    arg4 = INT2FIX(1);
    arg5 = INT2FIX(0);

    switch (argc) {
        case 5:
	    if (argv[4] == Qtrue) {
		    arg5 = INT2FIX(1);
	    }
        case 4:
	    arg4 = argv[3];
        case 3:
	    arg3 = argv[2];
        case 2:
	    arg2 = argv[1];
        case 1:
	    arg1 = argv[0];
	    break;
    }

    m = FIX2INT(arg1);
    k = FIX2INT(arg2);
    s = FIX2INT(arg3);
    b = FIX2INT(arg4);
    r = FIX2INT(arg5);

    if (b < 1 || b > 8)
        rb_raise(rb_eArgError, "bucket size");
    if (m < 1)
        rb_raise(rb_eArgError, "array size");
    if (k < 1)
        rb_raise(rb_eArgError, "hash length");
    if (s < 0)
        rb_raise(rb_eArgError, "random seed");

    bf->b = b;
    bf->m = m;
    bf->k = k;
    bf->s = s;
    bf->r = r;
    bf->num_set = 0;

    bytes = ((m * b) + 15) / 8;
    bf->ptr = ALLOC_N(unsigned char, bytes);

    /* initialize the bits with zeros */
    memset(bf->ptr, 0, bytes);
    rb_iv_set(obj, "@hash_value", rb_hash_new());

    return obj;
}

Instance Method Details

#[](key) ⇒ Object



19
20
21
22
# File 'lib/bloomfilter.rb', line 19

def [] key
  return nil unless include?(key)
  @hash_value[key]
end

#[]=(key, value) ⇒ Object



14
15
16
17
# File 'lib/bloomfilter.rb', line 14

def []= key, value
  insert(key)
  @hash_value[key] = value
end

#bObject



150
151
152
153
154
# File 'ext/sbloomfilter.c', line 150

static VALUE bf_b(VALUE self) {
    struct BloomFilter *bf;
    Data_Get_Struct(self, struct BloomFilter, bf);
    return INT2FIX(bf->b);
}

#delete(key) ⇒ Object



199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
# File 'ext/sbloomfilter.c', line 199

static VALUE bf_delete(VALUE self, VALUE key) {
    int index, seed;
    int i, len, m, k, s;
    char *ckey;

    struct BloomFilter *bf;
    Data_Get_Struct(self, struct BloomFilter, bf);

    Check_Type(key, T_STRING);
    ckey = STR2CSTR(key);
    len = (int) (RSTRINGLEN(key)); /* length of the string in bytes */

    m = bf->m;
    k = bf->k;
    s = bf->s;

    for (i = 0; i <= k - 1; i++) {
        /* seeds for hash functions */
        seed = i + s;

        /* hash */
        index = (int) (crc32((unsigned int) (seed), ckey, len) % (unsigned int) (m));

        /*  set a bit at the index */
        bucket_unset(bf, index);
    }

    bf->num_set += 1;
    return Qnil;
}

#include?(key) ⇒ Boolean

Returns:

  • (Boolean)


231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
# File 'ext/sbloomfilter.c', line 231

static VALUE bf_include(VALUE self, VALUE key) {
    int index, seed;
    int i, len, m, k, s;
    char *ckey;

    struct BloomFilter *bf;
    Data_Get_Struct(self, struct BloomFilter, bf);

    Check_Type(key, T_STRING);
    ckey = STR2CSTR(key);
    len = (int) (RSTRINGLEN(key)); /* length of the string in bytes */

    m = bf->m;
    k = bf->k;
    s = bf->s;

    for (i = 0; i <= k - 1; i++) {
        /* seeds for hash functions */
        seed = i + s;

        /* hash */
        index = (int) (crc32((unsigned int) (seed), ckey, len) % (unsigned int) (m));

        /* check the bit at the index */
        if (!bucket_check(bf, index))
            return Qfalse; /* i.e., it is a new entry ; escape the loop */
    }

    return Qtrue;
}

#insert(key) ⇒ Object



168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
# File 'ext/sbloomfilter.c', line 168

static VALUE bf_insert(VALUE self, VALUE key) {
    int index, seed;
    int i, len, m, k, s;
    char *ckey;

    struct BloomFilter *bf;
    Data_Get_Struct(self, struct BloomFilter, bf);

    Check_Type(key, T_STRING);
    ckey = STR2CSTR(key);
    len = (int) (RSTRINGLEN(key)); /* length of the string in bytes */

    m = bf->m;
    k = bf->k;
    s = bf->s;

    for (i = 0; i <= k - 1; i++) {
        /* seeds for hash functions */
        seed = i + s;

        /* hash */
        index = (int) (crc32((unsigned int) (seed), ckey, len) % (unsigned int) (m));

        /*  set a bit at the index */
        bucket_set(bf, index);
    }

    bf->num_set += 1;
    return Qnil;
}

#kObject



144
145
146
147
148
# File 'ext/sbloomfilter.c', line 144

static VALUE bf_k(VALUE self) {
    struct BloomFilter *bf;
    Data_Get_Struct(self, struct BloomFilter, bf);
    return INT2FIX(bf->k);
}

#key?(key) ⇒ Boolean

Returns:

  • (Boolean)


24
25
26
# File 'lib/bloomfilter.rb', line 24

def key? key
  include?(key)
end

#keysObject



28
29
30
# File 'lib/bloomfilter.rb', line 28

def keys
  @hash_value.keys
end

#mObject



138
139
140
141
142
# File 'ext/sbloomfilter.c', line 138

static VALUE bf_m(VALUE self) {
    struct BloomFilter *bf;
    Data_Get_Struct(self, struct BloomFilter, bf);
    return INT2FIX(bf->m);
}

#num_setObject



162
163
164
165
166
# File 'ext/sbloomfilter.c', line 162

static VALUE bf_num_set(VALUE self) {
    struct BloomFilter *bf;
    Data_Get_Struct(self, struct BloomFilter, bf);
    return INT2FIX(bf->num_set);
}

#rObject



156
157
158
159
160
# File 'ext/sbloomfilter.c', line 156

static VALUE bf_r(VALUE self) {
    struct BloomFilter *bf;
    Data_Get_Struct(self, struct BloomFilter, bf);
    return bf->r == 0 ? Qfalse : Qtrue;
}

#statsObject



4
5
6
7
8
9
10
11
12
# File 'lib/bloomfilter.rb', line 4

def stats
  fp = ((1.0 - Math.exp(-(self.k * self.num_set).to_f / self.m)) ** self.k) * 100
  printf "Number of filter buckets (m): %d\n" % self.m
  printf "Number of bits per buckets (b): %d\n" % self.b
  printf "Number of filter elements (n): %d\n" % self.num_set
  printf "Number of filter hashes (k) : %d\n" % self.k
  printf "Raise on overflow? (r) : %s\n" % self.r.to_s
  printf "Predicted false positive rate = %.2f%\n" % fp
end

#to_sObject



262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
# File 'ext/sbloomfilter.c', line 262

static VALUE bf_to_s(VALUE self) {
    struct BloomFilter *bf;
    unsigned char *ptr;
    int i;
    VALUE str;

    Data_Get_Struct(self, struct BloomFilter, bf);
    str = rb_str_new(0, bf->m);

    ptr = (unsigned char *) RSTRINGPTR(str);
    for (i = 0; i < bf->m; i++)
        *ptr++ = bucket_get(bf, i) ? '1' : '0';

    return str;
}