Class: Trie

Inherits:
Object
  • Object
show all
Defined in:
ext/trie/trie.c,
ext/trie/trie.c

Overview

A key-value data structure for string keys which is efficient memory usage and fast retrieval time.

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.read(filename_base) ⇒ Trie

Returns a new trie with data as read from disk.

Returns:



33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# File 'ext/trie/trie.c', line 33

static VALUE rb_trie_read(VALUE self, VALUE filename_base) {
  VALUE da_filename = rb_str_dup(filename_base);
  rb_str_concat(da_filename, rb_str_new2(".da"));
  StringValue(da_filename);
    
  VALUE tail_filename = rb_str_dup(filename_base);
  rb_str_concat(tail_filename, rb_str_new2(".tail"));
  StringValue(tail_filename);

  Trie *trie = trie_new();

  VALUE obj;
  obj = Data_Wrap_Struct(self, 0, trie_free, trie);

  DArray *old_da = trie->da;
  Tail *old_tail = trie->tail;

  FILE *da_file = fopen(RSTRING_PTR(da_filename), "r");
  if (da_file == NULL)
    raise_ioerror("Error reading .da file.");

  trie->da = da_read(da_file);
  fclose(da_file);

  FILE *tail_file = fopen(RSTRING_PTR(tail_filename), "r");
  if (tail_file == NULL)
    raise_ioerror("Error reading .tail file.");

  trie->tail = tail_read(tail_file);
  fclose(tail_file);

  da_free(old_da);
  tail_free(old_tail);

  return obj;
}

Instance Method Details

#add(key) ⇒ Object #add(key, value) ⇒ Object

Add a key, or a key and value to the Trie. If you add a key without a value it assumes true for the value.



119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# File 'ext/trie/trie.c', line 119

static VALUE rb_trie_add(VALUE self, VALUE args) {
	Trie *trie;
    Data_Get_Struct(self, Trie, trie);

    int size = RARRAY_LEN(args);
    if(size < 1 || size > 2)
		return Qnil;

    VALUE key;
    key = RARRAY_PTR(args)[0];
	StringValue(key);

    TrieData value = size == 2 ? RARRAY_PTR(args)[1] : TRIE_DATA_ERROR;
    
    if(trie_store(trie, (TrieChar*)RSTRING_PTR(key), value))
		return Qtrue;
    else
		return Qnil;
}

#children(prefix) ⇒ Array

Finds all keys in the Trie beginning with the given prefix.

Returns:

  • (Array)


189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
# File 'ext/trie/trie.c', line 189

static VALUE rb_trie_children(VALUE self, VALUE prefix) {
    if(NIL_P(prefix))
		return rb_ary_new();

	StringValue(prefix);

    Trie *trie;
    Data_Get_Struct(self, Trie, trie);

	int prefix_size = RSTRING_LEN(prefix);
    TrieState *state = trie_root(trie);
    VALUE children = rb_ary_new();
	TrieChar *char_prefix = (TrieChar*)RSTRING_PTR(prefix);
    
    const TrieChar *iterator = char_prefix;
    while(*iterator != 0) {
		if(!trie_state_is_walkable(state, *iterator))
			return children;
		trie_state_walk(state, *iterator);
		iterator++;
    }

    if(trie_state_is_terminal(state))
		rb_ary_push(children, prefix);
	
	char prefix_buffer[1024];
	memcpy(prefix_buffer, char_prefix, prefix_size);
	prefix_buffer[prefix_size] = 0;

    walk_all_paths(trie, children, state, prefix_buffer, prefix_size);

    trie_state_free(state);
    return children;
}

#children_with_values(key) ⇒ Array

Finds all keys with their respective values in the Trie beginning with the given prefix.

Returns:

  • (Array)


267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
# File 'ext/trie/trie.c', line 267

static VALUE rb_trie_children_with_values(VALUE self, VALUE prefix) {
    if(NIL_P(prefix))
		return rb_ary_new();

	StringValue(prefix);

    Trie *trie;
    Data_Get_Struct(self, Trie, trie);

	int prefix_size = RSTRING_LEN(prefix);
    TrieChar *char_prefix = (TrieChar*)RSTRING_PTR(prefix);
    
    VALUE children = rb_ary_new();

    TrieState *state = trie_root(trie);
    
    const TrieChar *iterator = char_prefix;
    while(*iterator != 0) {
		if(!trie_state_is_walkable(state, *iterator))
			return rb_ary_new();
		trie_state_walk(state, *iterator);
		iterator++;
    }

    if(trie_state_is_terminal(state)) {
		TrieState *end_state = trie_state_clone(state);
		trie_state_walk(end_state, '\0');

		VALUE tuple = rb_ary_new();
		rb_ary_push(tuple, prefix);
		TrieData trie_data = trie_state_get_data(end_state);
		rb_ary_push(tuple, (VALUE)trie_data);
		rb_ary_push(children, tuple);

		trie_state_free(end_state);
    }

	char prefix_buffer[1024];
	memcpy(prefix_buffer, char_prefix, prefix_size);
	prefix_buffer[prefix_size] = 0;

    walk_all_paths_with_values(trie, children, state, prefix_buffer, prefix_size);

    trie_state_free(state);
    return children;
}

#delete(key) ⇒ Object

Delete a key from the Trie. Returns true if it deleted a key, nil otherwise.



146
147
148
149
150
151
152
153
154
155
156
# File 'ext/trie/trie.c', line 146

static VALUE rb_trie_delete(VALUE self, VALUE key) {
	StringValue(key);

	Trie *trie;
    Data_Get_Struct(self, Trie, trie);

    if(trie_delete(trie, (TrieChar*)RSTRING_PTR(key)))
		return Qtrue;
    else
		return Qnil;
}

#get(key) ⇒ Object

Retrieves the value for a particular key (or nil) from the Trie.



98
99
100
101
102
103
104
105
106
107
108
109
# File 'ext/trie/trie.c', line 98

static VALUE rb_trie_get(VALUE self, VALUE key) {
	StringValue(key);

    Trie *trie;
    Data_Get_Struct(self, Trie, trie);

	TrieData data;
    if(trie_retrieve(trie, (TrieChar*)RSTRING_PTR(key), &data))
		return (VALUE)data;
    else
		return Qnil;
}

#has_key?(key) ⇒ Boolean

Determines whether or not a key exists in the Trie. Use this if you don’t care about the value, as it is marginally faster than Trie#get.

Returns:

  • (Boolean)


78
79
80
81
82
83
84
85
86
87
88
# File 'ext/trie/trie.c', line 78

static VALUE rb_trie_has_key(VALUE self, VALUE key) {
	StringValue(key);

    Trie *trie;
    Data_Get_Struct(self, Trie, trie);

    if(trie_has_key(trie, (TrieChar*)RSTRING_PTR(key)))
		return Qtrue;
    else
		return Qnil;
}

#rootTrieNode

Returns a TrieNode representing the root of the Trie.

Returns:



323
324
325
326
327
328
329
330
331
332
333
334
335
# File 'ext/trie/trie.c', line 323

static VALUE rb_trie_root(VALUE self) {
    Trie *trie;
    Data_Get_Struct(self, Trie, trie);

    VALUE trie_node = rb_trie_node_alloc(cTrieNode);

	TrieState *state = trie_root(trie);
	RDATA(trie_node)->data = state;
    
    rb_iv_set(trie_node, "@state", Qnil);
    rb_iv_set(trie_node, "@full_state", rb_str_new2(""));
    return trie_node;
}

#save(filename_base) ⇒ true

Saves the trie data to two files, filename_base.da and filename_base.tail. Returns true if saving was successful.

Returns:

  • (true)


504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
# File 'ext/trie/trie.c', line 504

static VALUE rb_trie_save(VALUE self, VALUE filename_base) {
  VALUE da_filename = rb_str_dup(filename_base);
  rb_str_concat(da_filename, rb_str_new2(".da"));
  StringValue(da_filename);
    
  VALUE tail_filename = rb_str_dup(filename_base);
  rb_str_concat(tail_filename, rb_str_new2(".tail"));
  StringValue(tail_filename);

  Trie *trie;
  Data_Get_Struct(self, Trie, trie);

  FILE *da_file = fopen(RSTRING_PTR(da_filename), "w");
  if (da_file == NULL)
    raise_ioerror("Error opening .da file for writing.");
  if (da_write(trie->da, da_file) != 0)
    raise_ioerror("Error writing DArray data.");
  fclose(da_file);

  FILE *tail_file = fopen(RSTRING_PTR(tail_filename), "w");
  if (tail_file == NULL)
    raise_ioerror("Error opening .tail file for writing.");
  if (tail_write(trie->tail, tail_file) != 0)
    raise_ioerror("Error writing Tail data.");
  fclose(tail_file);

  return Qtrue;
}