Class: LinkParser::Sentence
- Inherits:
-
Object
- Object
- LinkParser::Sentence
- Defined in:
- lib/linkparser/sentence.rb,
ext/sentence.c,
ext/sentence.c
Overview
A Sentence is the API’s representation of an input string,
tokenized and interpreted according to a specific Dictionary. After
a Sentence is created and parsed, various attributes of the
resulting set of linkages can be obtained.
Instance Method Summary collapse
-
#[] ⇒ Object
Element Reference—Returns the element at index, or returns a subarray starting at start and continuing for length elements, or returns a subarray specified by range.
-
#disjunct_cost(i) ⇒ Fixnum
The maximum cost of connectors used in the i-th linkage of the sentence.
-
#LinkParser::Sentence.new(str, dict) ⇒ Object
constructor
Create a new LinkParser::Sentence object from the given input string # using the specified LinkParser::Dictionary.
-
#inspect ⇒ Object
Return a human-readable representation of the Sentence object.
-
#length ⇒ Fixnum
Returns the number of words in the tokenized sentence, including the boundary words and punctuation.
-
#linkages ⇒ Array
Returns an Array of LinkParser::Linkage objects which represent the parts parsed from the sentence for the current linkage.
-
#null_count ⇒ Integer
Returns the number of null links that were used in parsing the sentence.
-
#num_linkages_found ⇒ Fixnum
Returns the number of linkages found when parsing the sentence.
-
#num_linkages_post_processed ⇒ Fixnum
Return the number of linkages that were actually post-processed (which may be less than the number found because of the linkage_limit parameter).
-
#num_valid_linkages ⇒ Fixnum
Return the number of linkages that had no post-processing violations.
-
#num_violations(i) ⇒ Fixnum
The number of post-processing violations that the i-th linkage had during the last parse.
-
#options ⇒ Object
Returns a ParseOptions object for the receiving sentence.
-
#parse(options = {}) ⇒ Fixnum
Attach a parse set to this sentence and return the number of linkages found.
-
#parsed? ⇒ Boolean
Returns
true
if the sentence has been parsed. -
#to_s ⇒ Object
Print out the sentence.
-
#word(idx) ⇒ String
Returns the spelling of the n-th word in the sentence as it appears after tokenization.
-
#words ⇒ Array
Returns the words of the sentence as they appear after tokenization.
Constructor Details
#LinkParser::Sentence.new(str, dict) ⇒ Object
Create a new LinkParser::Sentence object from the given input string # using the specified LinkParser::Dictionary.
dict = LinkParser::Dictionary.new
LinkParser::Sentence.new( "The boy runs", dict ) #=> #<LinkParser::Sentence:0x5481ac>
173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 |
# File 'ext/sentence.c', line 173
static VALUE
rlink_sentence_init( VALUE self, VALUE input_string, VALUE dictionary ) {
if ( !check_sentence(self) ) {
struct rlink_sentence *ptr;
Sentence sent;
struct rlink_dictionary *dictptr = rlink_get_dict( dictionary );
if ( !(sent = sentence_create( StringValueCStr(input_string), dictptr->dict )) )
rlink_raise_lp_error();
DATA_PTR( self ) = ptr = rlink_sentence_alloc();
ptr->sentence = sent;
ptr->dictionary = dictionary;
ptr->options = Qnil;
} else {
rb_raise( rb_eRuntimeError,
"Cannot re-initialize a sentence once it's been created." );
}
return self;
}
|
Dynamic Method Handling
This class handles dynamic methods through the method_missing method
#method_missing(sym, *args) ⇒ Object (protected)
Proxy method – auto-delegate calls to the first linkage.
69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
# File 'lib/linkparser/sentence.rb', line 69 def method_missing( sym, *args ) # Check both symbol and string for forward-compatibility with 1.9.x return super unless LinkParser::Linkage.instance_methods.include?( sym.to_s ) || LinkParser::Linkage.instance_methods.include?( sym ) linkage = self.linkages.first or raise LinkParser::Error, "sentence has no linkages" meth = linkage.method( sym ) self.singleton_class.send( :define_method, sym, &meth ) meth.call( *args ) rescue => err raise err, err., err.backtrace[ 0..-2 ] end |
Instance Method Details
#[](index) ⇒ String #[](start, length) ⇒ String #[](range) ⇒ String
Element Reference—Returns the element at index, or returns a subarray starting at start and continuing for length elements, or returns a subarray specified by range. Negative indices count backward from the end of the array (-1 is the last element). Returns nil if the index (or starting index) are out of range.
sent = dict.parse( "Birds fly south for the winter." )
sent[1] # => "birds"
sent[0,4] # => ["LEFT-WALL", "birds", "fly", "south"]
sent[1..3] # => ["birds", "fly", "south"]
401 402 403 |
# File 'ext/sentence.c', line 401 static VALUE rlink_sentence_aref( argc, argv, self ) int argc; |
#disjunct_cost(i) ⇒ Fixnum
The maximum cost of connectors used in the i-th linkage of the sentence.
517 518 519 520 521 522 523 524 525 526 527 |
# File 'ext/sentence.c', line 517
static VALUE
rlink_sentence_disjunct_cost( VALUE self, VALUE i ) {
struct rlink_sentence *ptr = get_sentence( self );
int count;
if ( !RTEST(ptr->parsed_p) )
rlink_sentence_parse( 0, 0, self );
count = sentence_disjunct_cost( (Sentence)ptr->sentence, FIX2INT(i) );
return INT2FIX( count );
}
|
#inspect ⇒ Object
Return a human-readable representation of the Sentence object.
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
# File 'lib/linkparser/sentence.rb', line 32 def inspect contents = '' if self.parsed? contents = %{"%s"/%d linkages/%d nulls} % [ self.to_s, self.num_linkages_found, self.null_count, ] else contents = "(unparsed)" end return "#<%s:0x%x %s>" % [ self.class.name, self.object_id / 2, contents, ] end |
#length ⇒ Fixnum
Returns the number of words in the tokenized sentence, including the boundary words and punctuation.
321 322 323 324 325 326 327 328 329 |
# File 'ext/sentence.c', line 321
static VALUE
rlink_sentence_length( VALUE self ) {
struct rlink_sentence *ptr = get_sentence( self );
if ( !RTEST(ptr->parsed_p) )
rlink_sentence_parse( 0, 0, self );
return INT2FIX( sentence_length((Sentence)ptr->sentence) );
}
|
#linkages ⇒ Array
Returns an Array of LinkParser::Linkage objects which represent the parts parsed from the sentence for the current linkage.
285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 |
# File 'ext/sentence.c', line 285
static VALUE
rlink_sentence_linkages( VALUE self ) {
struct rlink_sentence *ptr = get_sentence( self );
int i, count = 0;
VALUE rary;
if ( !RTEST(ptr->parsed_p) )
rlink_sentence_parse( 0, 0, self );
count = sentence_num_valid_linkages( (Sentence)ptr->sentence );
rary = rb_ary_new2( count );
for ( i = 0; i < count; i++ ) {
VALUE linkage;
VALUE args[2];
args[0] = INT2FIX( i );
args[1] = self;
linkage = rb_class_new_instance( 2, args, rlink_cLinkage );
rb_ary_store( rary, i, linkage );
}
return rary;
}
|
#null_count ⇒ Integer
Returns the number of null links that were used in parsing the sentence.
418 419 420 421 422 423 424 425 426 427 428 |
# File 'ext/sentence.c', line 418
static VALUE
rlink_sentence_null_count( VALUE self ) {
struct rlink_sentence *ptr = get_sentence( self );
int count;
if ( !RTEST(ptr->parsed_p) )
rlink_sentence_parse( 0, 0, self );
count = sentence_null_count( (Sentence)ptr->sentence );
return INT2FIX( count );
}
|
#num_linkages_found ⇒ Fixnum
Returns the number of linkages found when parsing the sentence. This will cause the sentence to be parsed if it hasn’t been already.
438 439 440 441 442 443 444 445 446 447 448 449 |
# File 'ext/sentence.c', line 438
static VALUE
rlink_sentence_num_linkages_found( VALUE self ) {
struct rlink_sentence *ptr = get_sentence( self );
int i = 0;
if ( !RTEST(ptr->parsed_p) )
rlink_sentence_parse( 0, 0, self );
i = sentence_num_linkages_found( (Sentence)ptr->sentence );
return INT2FIX( i );
}
|
#num_linkages_post_processed ⇒ Fixnum
Return the number of linkages that were actually post-processed (which may be less than the number found because of the linkage_limit parameter).
478 479 480 481 482 483 484 485 486 487 488 |
# File 'ext/sentence.c', line 478
static VALUE
rlink_sentence_num_linkages_post_processed( VALUE self ) {
struct rlink_sentence *ptr = get_sentence( self );
int count;
if ( !RTEST(ptr->parsed_p) )
rlink_sentence_parse( 0, 0, self );
count = sentence_num_linkages_post_processed( (Sentence)ptr->sentence );
return INT2FIX( count );
}
|
#num_valid_linkages ⇒ Fixnum
Return the number of linkages that had no post-processing violations.
458 459 460 461 462 463 464 465 466 467 468 |
# File 'ext/sentence.c', line 458
static VALUE
rlink_sentence_num_valid_linkages( VALUE self ) {
struct rlink_sentence *ptr = get_sentence( self );
int count;
if ( !RTEST(ptr->parsed_p) )
rlink_sentence_parse( 0, 0, self );
count = sentence_num_valid_linkages( (Sentence)ptr->sentence );
return INT2FIX( count );
}
|
#num_violations(i) ⇒ Fixnum
The number of post-processing violations that the i-th linkage had during the last parse.
498 499 500 501 502 503 504 505 506 507 508 |
# File 'ext/sentence.c', line 498
static VALUE
rlink_sentence_num_violations( VALUE self, VALUE i ) {
struct rlink_sentence *ptr = get_sentence( self );
int count;
if ( !RTEST(ptr->parsed_p) )
rlink_sentence_parse( 0, 0, self );
count = sentence_num_violations( (Sentence)ptr->sentence, FIX2INT(i) );
return INT2FIX( count );
}
|
#options ⇒ Object
Returns a ParseOptions object for the receiving sentence.
sentence..verbosity = 3
sentence..islands_ok? # -> true
268 269 270 271 272 |
# File 'ext/sentence.c', line 268
static VALUE
rlink_sentence_options( VALUE self ) {
struct rlink_sentence *ptr = get_sentence( self );
return ptr->options;
}
|
#parse(options = {}) ⇒ Fixnum
Attach a parse set to this sentence and return the number of linkages found. If any options
are specified, they override those set in the sentence’s dictionary.
207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 |
# File 'ext/sentence.c', line 207
static VALUE
rlink_sentence_parse( int argc, VALUE *argv, VALUE self ) {
struct rlink_sentence *ptr = get_sentence( self );
Parse_Options opts;
VALUE defopts = Qnil;
VALUE options = Qnil;
int link_count = 0;
/*
if ( RTEST(ptr->parsed_p) )
rb_raise( rlink_eLpError, "Can't reparse a sentence." );
*/
debugMsg(( "Parsing sentence <%p>", ptr ));
/* Merge the hash from this call with the one from the dict and build
Parse_Options from it. */
rb_scan_args( argc, argv, "01", &options );
defopts = rb_funcall( ptr->dictionary, rb_intern("options"), 0 );
/* Turn the option hash into a ParseOptions object, then extract the
Parse_Options struct from that */
options = rlink_make_parse_options( defopts, options );
opts = rlink_get_parseopts( options );
/* Parse the sentence */
if ( (link_count = sentence_parse( ptr->sentence, opts )) < 0 )
rlink_raise_lp_error();
ptr->options = options;
ptr->parsed_p = Qtrue;
return INT2FIX( link_count );
}
|
#parsed? ⇒ Boolean
Returns true
if the sentence has been parsed.
sentence.parsed? #-> false
sentence.parse #-> 6
sentence.parsed? #-> true
252 253 254 255 256 |
# File 'ext/sentence.c', line 252
static VALUE
rlink_sentence_parsed_p( VALUE self ) {
struct rlink_sentence *ptr = get_sentence( self );
return ptr->parsed_p;
}
|
#to_s ⇒ Object
Print out the sentence
53 54 55 |
# File 'lib/linkparser/sentence.rb', line 53 def to_s return self.words.join(" ") end |
#word(idx) ⇒ String
Returns the spelling of the n-th word in the sentence as it appears after tokenization.
339 340 341 342 343 344 345 346 347 348 349 |
# File 'ext/sentence.c', line 339
static VALUE
rlink_sentence_word( VALUE self, VALUE n ) {
struct rlink_sentence *ptr = get_sentence( self );
const char *word;
if ( !RTEST(ptr->parsed_p) )
rlink_sentence_parse( 0, 0, self );
word = sentence_get_word( (Sentence)ptr->sentence, FIX2INT(n) );
return rb_str_new2( word );
}
|
#words ⇒ Array
Returns the words of the sentence as they appear after tokenization.
sentence = LinkParser::Dictionary.new.parse( "The dogs barks." )
sentence.words #->
361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 |
# File 'ext/sentence.c', line 361
static VALUE
rlink_sentence_words( VALUE self ) {
struct rlink_sentence *ptr = get_sentence( self );
const char *word;
int i, length;
VALUE words = rb_ary_new();
if ( !RTEST(ptr->parsed_p) )
rlink_sentence_parse( 0, 0, self );
length = sentence_length( (Sentence)ptr->sentence );
for ( i = 0; i < length; i++ ) {
word = sentence_get_word( (Sentence)ptr->sentence, i );
debugMsg(( "Word %d: <%s>", i, word ));
rb_ary_push( words, rb_str_new2(word) );
}
return words;
}
|