Class: LinkParser::Sentence

Inherits:
Object
  • Object
show all
Defined in:
lib/linkparser/sentence.rb,
ext/sentence.c,
ext/sentence.c

Overview

A Sentence is the API’s representation of an input string,

tokenized and interpreted according to a specific Dictionary. After
a Sentence is created and parsed, various attributes of the
resulting set of linkages can be obtained.

Instance Method Summary collapse

Constructor Details

#LinkParser::Sentence.new(str, dict) ⇒ Object

Create a new LinkParser::Sentence object from the given input string # using the specified LinkParser::Dictionary.

dict = LinkParser::Dictionary.new
LinkParser::Sentence.new( "The boy runs", dict )  #=> #<LinkParser::Sentence:0x5481ac>


173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
# File 'ext/sentence.c', line 173

static VALUE
rlink_sentence_init( VALUE self, VALUE input_string, VALUE dictionary ) {
	if ( !check_sentence(self) ) {
		struct rlink_sentence *ptr;
		Sentence sent;
		struct rlink_dictionary *dictptr = rlink_get_dict( dictionary );

		if ( !(sent = sentence_create( StringValueCStr(input_string), dictptr->dict )) )
			rlink_raise_lp_error();

		DATA_PTR( self ) = ptr = rlink_sentence_alloc();

		ptr->sentence = sent;
		ptr->dictionary = dictionary;
		ptr->options = Qnil;

	} else {
		rb_raise( rb_eRuntimeError,
				  "Cannot re-initialize a sentence once it's been created." );
	}

	return self;
}

Dynamic Method Handling

This class handles dynamic methods through the method_missing method

#method_missing(sym, *args) ⇒ Object (protected)

Proxy method – auto-delegate calls to the first linkage.



69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# File 'lib/linkparser/sentence.rb', line 69

def method_missing( sym, *args )

	# Check both symbol and string for forward-compatibility with 1.9.x
	return super unless
		LinkParser::Linkage.instance_methods.include?( sym.to_s ) ||
		LinkParser::Linkage.instance_methods.include?( sym )

	linkage = self.linkages.first or raise LinkParser::Error, "sentence has no linkages"

	meth = linkage.method( sym )
	self.singleton_class.send( :define_method, sym, &meth )

	meth.call( *args )
rescue => err
	raise err, err.message, err.backtrace[ 0..-2 ]
end

Instance Method Details

#[](index) ⇒ String #[](start, length) ⇒ String #[](range) ⇒ String

Element Reference—Returns the element at index, or returns a subarray starting at start and continuing for length elements, or returns a subarray specified by range. Negative indices count backward from the end of the array (-1 is the last element). Returns nil if the index (or starting index) are out of range.

sent = dict.parse( "Birds fly south for the winter." )

sent[1]		# => "birds"
sent[0,4]	# => ["LEFT-WALL", "birds", "fly", "south"]
sent[1..3]	# => ["birds", "fly", "south"]

Overloads:

  • #[](index) ⇒ String

    Returns:

    • (String)
  • #[](start, length) ⇒ String

    Returns:

    • (String)
  • #[](range) ⇒ String

    Returns:

    • (String)


401
402
403
# File 'ext/sentence.c', line 401

static VALUE
rlink_sentence_aref( argc, argv, self )
int argc;

#disjunct_cost(i) ⇒ Fixnum

The maximum cost of connectors used in the i-th linkage of the sentence.

Returns:

  • (Fixnum)


517
518
519
520
521
522
523
524
525
526
527
# File 'ext/sentence.c', line 517

static VALUE
rlink_sentence_disjunct_cost( VALUE self, VALUE i ) {
	struct rlink_sentence *ptr = get_sentence( self );
	int count;

	if ( !RTEST(ptr->parsed_p) )
		rlink_sentence_parse( 0, 0, self );

	count = sentence_disjunct_cost( (Sentence)ptr->sentence, FIX2INT(i) );
	return INT2FIX( count );
}

#inspectObject

Return a human-readable representation of the Sentence object.



32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# File 'lib/linkparser/sentence.rb', line 32

def inspect
	contents = ''
	if self.parsed?
		contents = %{"%s"/%d linkages/%d nulls} % [
			self.to_s,
			self.num_linkages_found,
			self.null_count,
		]
	else
		contents = "(unparsed)"
	end

	return "#<%s:0x%x %s>" % [
		self.class.name,
		self.object_id / 2,
		contents,
	]
end

#lengthFixnum

Returns the number of words in the tokenized sentence, including the boundary words and punctuation.

Returns:

  • (Fixnum)


321
322
323
324
325
326
327
328
329
# File 'ext/sentence.c', line 321

static VALUE
rlink_sentence_length( VALUE self ) {
	struct rlink_sentence *ptr = get_sentence( self );

	if ( !RTEST(ptr->parsed_p) )
		rlink_sentence_parse( 0, 0, self );

	return INT2FIX( sentence_length((Sentence)ptr->sentence) );
}

#linkagesArray

Returns an Array of LinkParser::Linkage objects which represent the parts parsed from the sentence for the current linkage.

Returns:

  • (Array)


285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
# File 'ext/sentence.c', line 285

static VALUE
rlink_sentence_linkages( VALUE self ) {
	struct rlink_sentence *ptr = get_sentence( self );
	int i, count = 0;
	VALUE rary;

	if ( !RTEST(ptr->parsed_p) )
		rlink_sentence_parse( 0, 0, self );

	count = sentence_num_valid_linkages( (Sentence)ptr->sentence );
	rary = rb_ary_new2( count );

	for ( i = 0; i < count; i++ ) {
		VALUE linkage;
		VALUE args[2];

		args[0] = INT2FIX( i );
		args[1] = self;

		linkage = rb_class_new_instance( 2, args, rlink_cLinkage );
		rb_ary_store( rary, i, linkage );
	}

	return rary;
}

#null_countInteger

Returns the number of null links that were used in parsing the sentence.

Returns:

  • (Integer)


418
419
420
421
422
423
424
425
426
427
428
# File 'ext/sentence.c', line 418

static VALUE
rlink_sentence_null_count( VALUE self ) {
	struct rlink_sentence *ptr = get_sentence( self );
	int count;

	if ( !RTEST(ptr->parsed_p) )
		rlink_sentence_parse( 0, 0, self );

	count = sentence_null_count( (Sentence)ptr->sentence );
	return INT2FIX( count );
}

#num_linkages_foundFixnum

Returns the number of linkages found when parsing the sentence. This will cause the sentence to be parsed if it hasn’t been already.

Returns:

  • (Fixnum)


438
439
440
441
442
443
444
445
446
447
448
449
# File 'ext/sentence.c', line 438

static VALUE
rlink_sentence_num_linkages_found( VALUE self ) {
	struct rlink_sentence *ptr = get_sentence( self );
	int i = 0;

	if ( !RTEST(ptr->parsed_p) )
		rlink_sentence_parse( 0, 0, self );

	i = sentence_num_linkages_found( (Sentence)ptr->sentence );

	return INT2FIX( i );
}

#num_linkages_post_processedFixnum

Return the number of linkages that were actually post-processed (which may be less than the number found because of the linkage_limit parameter).

Returns:

  • (Fixnum)


478
479
480
481
482
483
484
485
486
487
488
# File 'ext/sentence.c', line 478

static VALUE
rlink_sentence_num_linkages_post_processed( VALUE self ) {
	struct rlink_sentence *ptr = get_sentence( self );
	int count;

	if ( !RTEST(ptr->parsed_p) )
		rlink_sentence_parse( 0, 0, self );

	count = sentence_num_linkages_post_processed( (Sentence)ptr->sentence );
	return INT2FIX( count );
}

#num_valid_linkagesFixnum

Return the number of linkages that had no post-processing violations.

Returns:

  • (Fixnum)


458
459
460
461
462
463
464
465
466
467
468
# File 'ext/sentence.c', line 458

static VALUE
rlink_sentence_num_valid_linkages( VALUE self ) {
	struct rlink_sentence *ptr = get_sentence( self );
	int count;

	if ( !RTEST(ptr->parsed_p) )
		rlink_sentence_parse( 0, 0, self );

	count = sentence_num_valid_linkages( (Sentence)ptr->sentence );
	return INT2FIX( count );
}

#num_violations(i) ⇒ Fixnum

The number of post-processing violations that the i-th linkage had during the last parse.

Returns:

  • (Fixnum)


498
499
500
501
502
503
504
505
506
507
508
# File 'ext/sentence.c', line 498

static VALUE
rlink_sentence_num_violations( VALUE self, VALUE i ) {
	struct rlink_sentence *ptr = get_sentence( self );
	int count;

	if ( !RTEST(ptr->parsed_p) )
		rlink_sentence_parse( 0, 0, self );

	count = sentence_num_violations( (Sentence)ptr->sentence, FIX2INT(i) );
	return INT2FIX( count );
}

#optionsObject

Returns a ParseOptions object for the receiving sentence.

sentence.options.verbosity = 3
sentence.options.islands_ok?  # -> true


268
269
270
271
272
# File 'ext/sentence.c', line 268

static VALUE
rlink_sentence_options( VALUE self ) {
	struct rlink_sentence *ptr = get_sentence( self );
	return ptr->options;
}

#parse(options = {}) ⇒ Fixnum

Attach a parse set to this sentence and return the number of linkages found. If any options are specified, they override those set in the sentence’s dictionary.

Returns:

  • (Fixnum)


207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
# File 'ext/sentence.c', line 207

static VALUE
rlink_sentence_parse( int argc, VALUE *argv, VALUE self ) {
	struct rlink_sentence *ptr = get_sentence( self );
	Parse_Options opts;
	VALUE defopts = Qnil;
	VALUE options = Qnil;
	int link_count = 0;

	/*
	if ( RTEST(ptr->parsed_p) )
		rb_raise( rlink_eLpError, "Can't reparse a sentence." );
	*/
	debugMsg(( "Parsing sentence <%p>", ptr ));

	/* Merge the hash from this call with the one from the dict and build
	   Parse_Options from it. */
	rb_scan_args( argc, argv, "01", &options );
	defopts = rb_funcall( ptr->dictionary, rb_intern("options"), 0 );

	/* Turn the option hash into a ParseOptions object, then extract the
	   Parse_Options struct from that  */
	options = rlink_make_parse_options( defopts, options );
	opts = rlink_get_parseopts( options );

	/* Parse the sentence */
	if ( (link_count = sentence_parse( ptr->sentence, opts )) < 0 )
		rlink_raise_lp_error();

	ptr->options = options;
	ptr->parsed_p = Qtrue;

	return INT2FIX( link_count );
}

#parsed?Boolean

Returns true if the sentence has been parsed.

sentence.parsed?   #-> false
sentence.parse     #-> 6
sentence.parsed?   #-> true

Returns:

  • (Boolean)


252
253
254
255
256
# File 'ext/sentence.c', line 252

static VALUE
rlink_sentence_parsed_p( VALUE self ) {
	struct rlink_sentence *ptr = get_sentence( self );
	return ptr->parsed_p;
}

#to_sObject

Print out the sentence



53
54
55
# File 'lib/linkparser/sentence.rb', line 53

def to_s
	return self.words.join(" ")
end

#word(idx) ⇒ String

Returns the spelling of the n-th word in the sentence as it appears after tokenization.

Returns:

  • (String)


339
340
341
342
343
344
345
346
347
348
349
# File 'ext/sentence.c', line 339

static VALUE
rlink_sentence_word( VALUE self, VALUE n ) {
	struct rlink_sentence *ptr = get_sentence( self );
	const char *word;

	if ( !RTEST(ptr->parsed_p) )
		rlink_sentence_parse( 0, 0, self );

	word = sentence_get_word( (Sentence)ptr->sentence, FIX2INT(n) );
	return rb_str_new2( word );
}

#wordsArray

Returns the words of the sentence as they appear after tokenization.

sentence = LinkParser::Dictionary.new.parse( "The dogs barks." )
sentence.words  #->

Returns:

  • (Array)


361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
# File 'ext/sentence.c', line 361

static VALUE
rlink_sentence_words( VALUE self ) {
	struct rlink_sentence *ptr = get_sentence( self );
	const char *word;
	int i, length;
	VALUE words = rb_ary_new();

	if ( !RTEST(ptr->parsed_p) )
		rlink_sentence_parse( 0, 0, self );

	length = sentence_length( (Sentence)ptr->sentence );
	for ( i = 0; i < length; i++ ) {
		word = sentence_get_word( (Sentence)ptr->sentence, i );
		debugMsg(( "Word %d: <%s>", i, word ));
		rb_ary_push( words, rb_str_new2(word) );
	}

	return words;
}