Class: LinkParser::Dictionary

Inherits:
Object
  • Object
show all
Defined in:
ext/dictionary.c,
ext/dictionary.c

Overview

A Dictionary is the programmer’s handle on the set of word definitions that defines the grammar. A user creates a Dictionary from a grammar file and post-process knowledge file, and then creates all other objects through it.

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#LinkParser::Dictionary.new(*args) ⇒ Object

Create a new LinkParser::Dictionary.

The preferred way to set up the dictionary is to call it with no arguments, which will look for a dictionary with the same language as the current environment. Alternatively, a fixed language can be specified by specifying an ISO639 language code, for example, LinkParser::Dictionary.new( :en ).

Explicit dictionary file names can be also specified, like so:

Dictionary.new( dict_file,
                post_process_file,
                constituent_knowledge_file,
                affix_file )

This mode of dictionary construction is not recommended for new development, and is intended for advanced users only. To create the dictionary, the Dictionary looks in the current directory and the data directory for the files dict_file, post_process_file, constituent_knowledge_file, and affix_file. The last three entries may be omitted. If dict_file is a fully specified path name, then the other file names, which need not be fully specified, will be prefixed by the directory specified by dict_file.

In any case, a Hash of options can be specified which will be used as default ParseOption attributes for any sentences created from it.

Examples:

dict = LinkParser::Dictionary.new

dict = LinkParser::Dictionary.new( :de )

dict = LinkParser::Dictionary.new( '/var/data/custom_dicts/4.2.dict' )


173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
# File 'ext/dictionary.c', line 173

static VALUE
rlink_dict_initialize( int argc, VALUE *argv, VALUE self ) {
	if ( !check_dict(self) ) {
		int i = 0;
		struct rlink_dictionary *ptr = NULL;
		Dictionary dict = NULL;
		VALUE arg1, arg2, arg3, arg4, arg5 = Qnil;
		VALUE lang = Qnil;
		VALUE opthash = Qnil;

		switch( i = rb_scan_args(argc, argv, "05", &arg1, &arg2, &arg3, &arg4, &arg5) ) {
		  /* Dictionary.new */
		  case 0:
			debugMsg(( "No arguments" ));
			break;

		  /* Dictionary.new( lang )*/
		  /* Dictionary.new( opthash )*/
		  case 1:
			if( TYPE(arg1) == T_HASH ) {
				debugMsg(( "One arg: options hash."));
				opthash = arg1;
			} else {
				debugMsg(( "One arg: language" ));
				lang = arg1;
			}
			break;

		  /* Dictionary.new( lang, opthash ) */
		  case 2:
			debugMsg(( "Two args: language and options hash."));
			lang = arg1;
			opthash = arg2;
			break;

		  /* Dictionary.new( dict, pp, cons, affix ) */
		  /* Dictionary.new( dict, pp, cons, affix, opthash ) */
		  case 4:
		  case 5:
			debugMsg(( "Four or five args: old-style explicit dict files." ));
			dict = rlink_make_oldstyle_dict( arg1, arg2, arg3, arg4 );
			opthash = arg5;
			break;

		  /* Anything else is an error */
		  default:
			rb_raise( rb_eArgError,
				"wrong number of arguments (%d for 0,1,2,4, or 5)", i );
		}

		/* Create the dictionary if it hasn't been already */
		if ( !dict && i < 4 ) {
			if ( RTEST(lang) ) {
				SafeStringValue( lang );
				dict = dictionary_create_lang( StringValuePtr(lang) );
			} else {
				dict = dictionary_create_default_lang();
			}
		}

		/* If the dictionary still isn't created, there was an error
		   creating it */
		if ( !dict ) rlink_raise_lp_error();

		debugMsg(( "Created dictionary %p", dict ));
		DATA_PTR( self ) = ptr = rlink_dictionary_alloc();

		ptr->dict = dict;

		/* If they passed in an options hash, save it for later. */
		if ( RTEST(opthash) ) rb_iv_set( self, "@options", opthash );
		else rb_iv_set( self, "@options", rb_hash_new() );
	}

	else {
		rb_raise( rb_eRuntimeError, "Cannot re-initialize a Dictionary object." );
	}

	return Qnil;
}

Instance Attribute Details

#optionsObject (readonly)

The LinkParser::ParseOptions object for the Dictionary

Instance Method Details

#max_costFixnum

Returns the maximum cost (number of brackets []) that is placed on any connector in the dictionary. This is useful for designing a parsing algorithm that progresses in stages, first trying the cheap connectors.

Returns:

  • (Fixnum)


263
264
265
266
267
268
269
270
271
272
# File 'ext/dictionary.c', line 263

static VALUE
rlink_get_max_cost( VALUE self ) {
	struct rlink_dictionary *ptr = get_dict( self );

	int cost = dictionary_get_max_cost( ptr->dict );

	debugMsg(( "Max cost is: %d", cost ));

	return INT2NUM( cost );
}

#parse(string) ⇒ Object #parse(string, options) ⇒ Object

Parse the specified sentence string with the dictionary and return a LinkParser::Sentence. If you specify an options hash, its values will override those of the Dictionary’s for the resulting Sentence.



284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
# File 'ext/dictionary.c', line 284

static VALUE
rlink_parse( int argc, VALUE *argv, VALUE self ) {
	VALUE input_string, options, sentence;
	VALUE args[2];
	int i;

	i = rb_scan_args( argc, argv, "11", &input_string, &options );

	/* Create the new sentence */
	args[0] = input_string;
	args[1] = self;
	sentence = rb_class_new_instance( 2, args, rlink_cSentence );

	/* Now call #parse on it */
	if ( i == 1 )
		rb_funcall( sentence, rb_intern("parse"), 0, 0 );
	else
		rb_funcall( sentence, rb_intern("parse"), 1, options );

	return sentence;
}