Class: Isomorfeus::Ferret::QueryParser

Inherits:
Object
  • Object
show all
Defined in:
ext/isomorfeus_ferret_ext/frb_qparser.c

Instance Method Summary collapse

Constructor Details

#new(options = {}) ⇒ QueryParser

Create a new QueryParser. The QueryParser is used to convert string queries into Query objects. The options are;

Options

:default_field

Default: “*” (all fields). The default field to search when no field is specified in the search string. It can also be an array of fields.

:analyzer

Default: StandardAnalyzer. FrtAnalyzer used by the query parser to parse query terms

:wild_card_downcase

Default: true. Specifies whether wild-card queries and range queries should be downcased or not since they are not passed through the parser

:fields

Default: []. Lets the query parser know what fields are available for searching, particularly when the “*” is specified as the search field

:tokenized_fields

Default: :fields. Lets the query parser know which fields are tokenized so it knows which fields to run the analyzer over.

:validate_fields

Default: false. Set to true if you want an exception to be raised if there is an attempt to search a non-existent field

:or_default

Default: true. Use “OR” as the default boolean operator

:default_slop

Default: 0. Default slop to use in PhraseQuery

:handle_parse_errors

Default: true. QueryParser will quietly handle all parsing errors internally. If you’d like to handle them yourself, set this parameter to false.

:clean_string

Default: true. QueryParser will do a quick once-over the query string make sure that quotes and brackets match up and special characters are escaped

:max_clauses

Default: 512. the maximum number of clauses allowed in boolean queries and the maximum number of terms allowed in multi, prefix, wild-card or fuzzy queries when those queries are generated by rewriting other queries

:use_keywords

Default: true. By default AND, OR, NOT and REQ are keywords used by the query parser. Sometimes this is undesirable. For example, if your application allows searching for US states by their abbreviation, then OR will be a common query string. By setting :use_keywords to false, OR will no longer be a keyword allowing searches for the state of Oregon. You will still be able to use boolean queries by using the + and - characters.

:use_typed_range_query

Default: false. Use TypedRangeQuery instead of the standard RangeQuery when parsing range queries. This is useful if you have number fields which you want to perform range queries on. You won’t need to pad or normalize the data in the field in anyway to get correct results. However, performance will be a lot slower for large indexes, hence the default. Note: the default is set to true in the Index class.



167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
# File 'ext/isomorfeus_ferret_ext/frb_qparser.c', line 167

static VALUE frb_qp_init(int argc, VALUE *argv, VALUE self) {
    VALUE roptions = Qnil;
    VALUE rval;
    FrtAnalyzer *analyzer = NULL;
    FrtHashSet *def_fields = NULL;
    FrtHashSet *all_fields = NULL;
    FrtHashSet *tkz_fields = NULL;
    FrtQParser *qp;
    TypedData_Get_Struct(self, FrtQParser, &frb_qp_t, qp);
    if (rb_scan_args(argc, argv, "01", &roptions) > 0) {
        if (TYPE(roptions) == T_HASH) {
            if (Qnil != (rval = rb_hash_aref(roptions, sym_default_field))) {
                def_fields = frb_get_fields(rval, NULL);
            }
            if (Qnil != (rval = rb_hash_aref(roptions, sym_analyzer))) {
                analyzer = frb_get_cwrapped_analyzer(rval);
            }
            if (Qnil != (rval = rb_hash_aref(roptions, sym_all_fields))) {
                all_fields = frb_get_fields(rval, def_fields);
            }
            if (Qnil != (rval = rb_hash_aref(roptions, sym_fields))) {
                all_fields = frb_get_fields(rval, def_fields);
            }
            if (Qnil != (rval = rb_hash_aref(roptions, sym_tkz_fields))) {
                tkz_fields = frb_get_fields(rval, def_fields);
            }
        } else {
            def_fields = frb_get_fields(roptions, def_fields);
            roptions = Qnil;
        }
    }
    if (all_fields == NULL) {
        all_fields = frt_hs_new_ptr(NULL);
    }
    if (!analyzer) {
        analyzer = frt_standard_analyzer_new(true);
    }
    qp = frt_qp_init(qp, analyzer);
    if (def_fields) hs_safe_merge(all_fields, def_fields);
    if (tkz_fields) hs_safe_merge(all_fields, tkz_fields);
    qp->all_fields = all_fields;
    qp->def_fields = def_fields ? def_fields : all_fields;
    qp->tokenized_fields = tkz_fields ? tkz_fields : all_fields;
    qp->fields_top->fields = def_fields;

    qp->allow_any_fields = true;
    qp->clean_str = true;
    qp->handle_parse_errors = true;
    /* handle options */
    if (roptions != Qnil) {
        if (Qnil != (rval = rb_hash_aref(roptions, sym_handle_parse_errors))) {
            qp->handle_parse_errors = RTEST(rval);
        }
        if (Qnil != (rval = rb_hash_aref(roptions, sym_validate_fields))) {
            qp->allow_any_fields = !RTEST(rval);
        }
        if (Qnil != (rval = rb_hash_aref(roptions, sym_wild_card_downcase))) {
            qp->wild_lower = RTEST(rval);
        }
        if (Qnil != (rval = rb_hash_aref(roptions, sym_or_default))) {
            qp->or_default = RTEST(rval);
        }
        if (Qnil != (rval = rb_hash_aref(roptions, sym_default_slop))) {
            qp->def_slop = FIX2INT(rval);
        }
        if (Qnil != (rval = rb_hash_aref(roptions, sym_clean_string))) {
            qp->clean_str = RTEST(rval);
        }
        if (Qnil != (rval = rb_hash_aref(roptions, sym_max_clauses))) {
            qp->max_clauses = FIX2INT(rval);
        }
        if (Qnil != (rval = rb_hash_aref(roptions, sym_use_keywords))) {
            qp->use_keywords = RTEST(rval);
        }
        if (Qnil != (rval = rb_hash_aref(roptions, sym_use_typed_range_query))){
            qp->use_typed_range_query = RTEST(rval);
        }
    }
    return self;
}

Instance Method Details

#fieldsArray of Symbols

Returns the list of all fields that the QueryParser knows about.

Returns:

  • (Array of Symbols)


282
283
284
285
286
287
288
289
290
291
292
293
# File 'ext/isomorfeus_ferret_ext/frb_qparser.c', line 282

static VALUE frb_qp_get_fields(VALUE self) {
    GET_QP;
    FrtHashSet *fields = qp->all_fields;
    FrtHashSetEntry *hse;
    VALUE rfields = rb_ary_new();

    for (hse = fields->first; hse; hse = hse->next) {
        rb_ary_push(rfields, ID2SYM(rb_intern((char *)hse->elem)));
    }

    return rfields;
}

#fields=(fields) ⇒ self

Set the list of fields. These fields are expanded for searches on “*”.

Returns:

  • (self)


301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
# File 'ext/isomorfeus_ferret_ext/frb_qparser.c', line 301

static VALUE frb_qp_set_fields(VALUE self, VALUE rfields) {
    GET_QP;
    FrtHashSet *fields = frb_get_fields(rfields, NULL);

    /* if def_fields == all_fields then we need to replace both */
    if (qp->def_fields == qp->all_fields) qp->def_fields = NULL;
    if (qp->tokenized_fields == qp->all_fields) qp->tokenized_fields = NULL;

    if (fields == NULL) {
        fields = frt_hs_new_ptr(NULL);
    }

    /* make sure all the fields in tokenized fields are contained in
     * all_fields */
    if (qp->tokenized_fields) hs_safe_merge(fields, qp->tokenized_fields);

    /* delete old fields set */
    assert(qp->all_fields->free_elem_i == frt_dummy_free);
    frt_hs_destroy(qp->all_fields);

    /* add the new fields set and add to def_fields if necessary */
    qp->all_fields = fields;
    if (qp->def_fields == NULL) {
        qp->def_fields = fields;
        qp->fields_top->fields = fields;
    }
    if (qp->tokenized_fields == NULL) qp->tokenized_fields = fields;

    return self;
}

#parse(query_string) ⇒ Query

Parse a query string returning a Query object if parsing was successful. Will raise a QueryParseException if unsuccessful.

Returns:

  • (Query)


256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
# File 'ext/isomorfeus_ferret_ext/frb_qparser.c', line 256

static VALUE frb_qp_parse(VALUE self, VALUE rstr) {
    const char *volatile msg = NULL;
    volatile VALUE rq;
    GET_QP;
    FrtQuery *q;
    rstr = rb_obj_as_string(rstr);
    FRT_TRY
        q = qp_parse(qp, rs2s(rstr), rb_enc_get(rstr));
        rq = frb_get_q(q);
        break;
    FRT_XCATCHALL
        msg = xcontext.msg;
        FRT_HANDLED();
    FRT_XENDTRY
    if (msg) {
        rb_raise(cQueryParseException, "%s", msg);
    }
    return rq;
}

#tokenized_fieldsArray of Symbols

Returns the list of all tokenized_fields that the QueryParser knows about.

Returns:

  • (Array of Symbols)


338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
# File 'ext/isomorfeus_ferret_ext/frb_qparser.c', line 338

static VALUE frb_qp_get_tkz_fields(VALUE self) {
    GET_QP;
    FrtHashSet *fields = qp->tokenized_fields;
    if (fields) {
        VALUE rfields = rb_ary_new();
        FrtHashSetEntry *hse;

        for (hse = fields->first; hse; hse = hse->next) {
            rb_ary_push(rfields, ID2SYM(rb_intern((char *)hse->elem)));
        }

        return rfields;
    } else {
        return Qnil;
    }
}

#tokenized_fields=(fields) ⇒ self

Set the list of tokenized_fields. These tokenized_fields are tokenized in the queries. If this is set to Qnil then all fields will be tokenized.

Returns:

  • (self)


362
363
364
365
366
367
368
369
# File 'ext/isomorfeus_ferret_ext/frb_qparser.c', line 362

static VALUE frb_qp_set_tkz_fields(VALUE self, VALUE rfields) {
    GET_QP;
    if (qp->tokenized_fields != qp->all_fields) {
        frt_hs_destroy(qp->tokenized_fields);
    }
    qp->tokenized_fields = frb_get_fields(rfields, NULL);
    return self;
}