Class: KDTree

Inherits:
Object
  • Object
show all
Defined in:
ext/kdtree.c

Instance Method Summary collapse

Constructor Details

#new(points) ⇒ Object #new(io) ⇒ Object

Returns a new KDTree. To construct a tree, pass an array of points. Each point should be an array of the form [x, y, id], where x and y are floats and id is an integer. The id is arbitrary and will be returned to you whenever you search with nearest or nearestk.

# create a new tree
points = []
points << [47.6, -122.3, 1] # Seattle
points << [40.7, -74.0, 2]  # New York
kd = KDTree.new(points)

Alternately, you can pass in an IO object containing a persisted kdtree. This makes it possible to build the tree in advance, persist it, and start it up quickly later. See persist for more information.



114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
# File 'ext/kdtree.c', line 114

static VALUE kdtree_initialize(VALUE kdtree, VALUE arg)
{
    KDTREEP;

    if (TYPE(arg) == T_ARRAY) {
        // init from array of pints
        VALUE points = arg;
        kdtreep->len = RARRAY_LEN(points);
        kdtreep->nodes = ALLOC_N(struct kdtree_node, kdtreep->len);

        int i;
        for (i = 0; i < RARRAY_LEN(points); ++i) {
            struct kdtree_node *n = kdtreep->nodes + i;
            
            VALUE ptr = RARRAY_PTR(points)[i];
            VALUE v = rb_check_array_type(ptr);
            if (NIL_P(v) || RARRAY_LEN(v) != 3) {
                continue;
            }
            VALUE *a = RARRAY_PTR(ptr);
            n->x = NUM2DBL(a[0]);
            n->y = NUM2DBL(a[1]);        
            n->id = NUM2INT(a[2]);
        }

        // now build the tree
        kdtreep->root = kdtree_build(kdtreep, 0, kdtreep->len, 0);
    } else if (rb_respond_to(arg, rb_intern("read"))) {
        VALUE io = arg;
        if (rb_respond_to(io, rb_intern("binmode"))) {
            rb_funcall2(io, rb_intern("binmode"), 0, 0);
        }

        struct rb_io_t *fptr = RFILE(rb_io_taint_check(io))->fptr;
        rb_io_check_readable(fptr);

        // check magic
        char buf[4];
        read_all(fptr, buf, 4);
        if (memcmp(KDTREE_MAGIC, buf, 4) != 0) {
            rb_raise(rb_eRuntimeError, "wrong magic number in kdtree file");
        }
        
        // read start of the struct
        read_all(fptr, (char *)kdtreep, sizeof(struct kdtree_data) - sizeof(struct kdtree_node *));
        // read the nodes
        kdtreep->nodes = ALLOC_N(struct kdtree_node, kdtreep->len);
        read_all(fptr, (char *)kdtreep->nodes, sizeof(struct kdtree_node) * kdtreep->len);
    } else {
        rb_raise(rb_eTypeError, "array or IO required to init KDTree");
    }
    
    return kdtree;
}

Instance Method Details

#nearest(x, y) ⇒ Object

Finds the point closest to x, y and returns the id for that point. Returns -1 if the tree is empty.

points = []
points << [47.6, -122.3, 1] # Seattle
points << [40.7, -74.0, 2]  # New York
kd = KDTree.new(points)

# which city is closest to Portland?
kd.nearest(45.5, -122.8) #=> 1
# which city is closest to Boston?
kd.nearest(42.4, -71.1)  #=> 2


217
218
219
220
221
222
223
224
225
226
227
228
229
# File 'ext/kdtree.c', line 217

static VALUE kdtree_nearest(VALUE kdtree, VALUE x, VALUE y)
{
    KDTREEP;

    int n_index = -1;
    float n_dist = INT_MAX;

    kdtree_nearest0(kdtreep, kdtreep->root, NUM2DBL(x), NUM2DBL(y), 0, &n_index, &n_dist);
    if (n_index == -1) {
        return -1;
    }
    return INT2NUM((kdtreep->nodes + n_index)->id);
}

#nearestk(x, y, k) ⇒ Array

Finds the k points closest to x, y. Returns an array of ids, sorted by distance. Returns an empty array if the tree is empty. Note that k is capped at 255.

points = []
points << [47.6, -122.3, 1] # Seattle
points << [45.5, -122.8, 2] # Portland
points << [40.7, -74.0,  3] # New York
kd = KDTree.new(points)

# which two cities are closest to San Francisco?
kd.nearest(34.1, -118.2) #=> [2, 1]

Returns:

  • (Array)


293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
# File 'ext/kdtree.c', line 293

static VALUE kdtree_nearestk(VALUE kdtree, VALUE x, VALUE y, VALUE k)
{
    KDTREEP;

    // note I leave an extra slot here at the end because of the way our binary insert works
    kresult k_list[MAX_K + 1];
    int k_len = 0;
    float k_dist = INT_MAX;

    int ki = NUM2INT(k);
    if (ki < 1) {
        ki = 1;
    } else if (ki > MAX_K) {
        ki = MAX_K;
    }
    kdtree_nearestk0(kdtreep, kdtreep->root, NUM2DBL(x), NUM2DBL(y), ki, 0, k_list, &k_len, &k_dist);

    // convert result to ruby array
    VALUE ary = rb_ary_new();
    int i;
    for (i = 0; i < k_len; ++i) {
        rb_ary_push(ary, INT2NUM(kdtreep->nodes[k_list[i].index].id));
    }
    return ary;
}

#persist(io) ⇒ Object

Writes the tree out to io so you can quickly load it later with KDTree.new. This avoids the startup cost of initializing a tree. Apart from a small header, the size of the file is proportional to the number of points, requiring 20 bytes per point.

This file is NOT PORTABLE across different architectures due to endian issues.

points = []
points << [47.6, -122.3, 1] # Seattle
points << [45.5, -122.8, 2] # Portland
points << [40.7, -74.0,  3] # New York
kd = KDTree.new(points)

# persist the tree to disk
File.open("treefile", "w") { |f| kd.persist(f) }

...

# later, read the tree from disk
kd2 = File.open("treefile") { |f| KDTree.new(f) }


412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
# File 'ext/kdtree.c', line 412

static VALUE kdtree_persist(VALUE kdtree, VALUE io)
{
    KDTREEP;
    
    if (!rb_respond_to(io, rb_intern("write"))) {
        rb_raise(rb_eTypeError, "instance of IO needed");
    }
    if (rb_respond_to(io, rb_intern("binmode"))) {
        rb_funcall2(io, rb_intern("binmode"), 0, 0);
    }

    VALUE str = rb_str_buf_new(0);
    rb_str_buf_cat(str, KDTREE_MAGIC, 4);
    rb_str_buf_cat(str, (char*)kdtreep, sizeof(struct kdtree_data) - sizeof(struct kdtree_node *));
    rb_str_buf_cat(str, (char*)kdtreep->nodes, sizeof(struct kdtree_node) * kdtreep->len);
    rb_io_write(io, str);
    return io;
}

#to_sString

A string that tells you a bit about the tree.

Returns:

  • (String)


437
438
439
440
441
442
443
444
# File 'ext/kdtree.c', line 437

static VALUE kdtree_to_s(VALUE kdtree)
{
    KDTREEP;

    char buf[256];
    sprintf(buf, "#<%s:%p nodes=%d>", rb_obj_classname(kdtree), (void*)kdtree, kdtreep->len);
    return rb_str_new(buf, strlen(buf));
}