Class: Mittens::Stemmer
- Inherits:
-
Object
- Object
- Mittens::Stemmer
- Defined in:
- ext/mittens/ext.c
Class Method Summary collapse
Instance Method Summary collapse
- #initialize(*args) ⇒ Object constructor
- #stem(value) ⇒ Object
Constructor Details
#initialize(*args) ⇒ Object
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
# File 'ext/mittens/ext.c', line 32
static VALUE stemmer_initialize(int argc, VALUE* argv, VALUE self)
{
VALUE opts;
rb_scan_args(argc, argv, ":", &opts);
const char * algorithm = "english";
if (!NIL_P(opts)) {
VALUE language = rb_hash_aref(opts, ID2SYM(rb_intern("language")));
if (!NIL_P(language)) {
Check_Type(language, T_STRING);
algorithm = RSTRING_PTR(language);
}
}
stemmer_t *stemmer;
TypedData_Get_Struct(self, stemmer_t, &stemmer_data_type, stemmer);
// in case called multiple times
sb_stemmer_delete(stemmer->stemmer);
// if adding support for encoding, may want to change encoding returned from stem
stemmer->stemmer = sb_stemmer_new(algorithm, NULL);
if (stemmer->stemmer == NULL) {
rb_raise(rb_eArgError, "unknown language: %s", algorithm);
}
return self;
}
|
Class Method Details
.languages ⇒ Object
75 76 77 78 79 80 81 82 83 84 85 86 |
# File 'ext/mittens/ext.c', line 75
static VALUE stemmer_languages(VALUE klass)
{
VALUE out = rb_ary_new();
const char **language = sb_stemmer_list();
while (*language != NULL) {
rb_ary_push(out, rb_utf8_str_new_cstr(*language));
language++;
}
return out;
}
|
Instance Method Details
#stem(value) ⇒ Object
61 62 63 64 65 66 67 68 69 70 71 72 73 |
# File 'ext/mittens/ext.c', line 61
static VALUE stemmer_stem(VALUE self, VALUE value)
{
stemmer_t *stemmer;
TypedData_Get_Struct(self, stemmer_t, &stemmer_data_type, stemmer);
Check_Type(value, T_STRING);
const sb_symbol * word = (const sb_symbol *) RSTRING_PTR(value);
int size = (int) RSTRING_LEN(value);
const sb_symbol * pointer_out = sb_stemmer_stem(stemmer->stemmer, word, size);
return rb_utf8_str_new_cstr((char *) pointer_out);
}
|