Class: Wapiti::Options

Inherits:
Object
  • Object
show all
Includes:
Comparable
Defined in:
lib/wapiti/options.rb,
ext/wapiti/native.c

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(*args) ⇒ Object



96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
# File 'ext/wapiti/native.c', line 96

static VALUE initialize_options(int argc, VALUE *argv, VALUE self) {	
	opt_t* options = get_options(self);
	*options = opt_defaults;
	
	if (options->maxiter == 0) {
		options->maxiter = INT_MAX;
	}
	
	// copy the default algorithm name to the heap so that all options strings
	// are on the heap
	char* tmp = calloc(strlen(options->algo), sizeof(char));
	memcpy(tmp, options->algo, strlen(options->algo));
	options->algo = tmp;

	if (argc > 1) {
		rb_raise(rb_const_get(rb_mKernel, rb_intern("ArgumentError")),
			"wrong number of arguments (%d for 0..1)", argc);
	}

	// set defaults
	if (argc) {
		Check_Type(argv[0], T_HASH);
		(void)rb_funcall(self, rb_intern("update"), 1, argv[0]);
	}
	
	// yield self if block_given?
	if (rb_block_given_p()) {
		rb_yield(self);
	}
	
	return self;
}

Class Method Details

.algorithmsObject

Returns the list of supported algorithm options.



23
24
25
# File 'lib/wapiti/options.rb', line 23

def algorithms
	@algorithms ||= %w{ l-bfgs sgd-l1 bcd rprop rprop+ rprop- auto }.freeze
end

.attribute_namesObject

Returns a sorted list of available option attributes.



10
11
12
13
14
15
# File 'lib/wapiti/options.rb', line 10

def attribute_names
	@attribute_names ||= %w{ stop_window convergence_window posterior
		max_iterations jobsize threads rho1 rho2 stop_epsilon score check
		algorithm pattern development_data maxent compact sparse skip_tokens
		}.sort.map(&:to_sym).freeze
end

.defaultsObject

Returns the default options.



18
19
20
# File 'lib/wapiti/options.rb', line 18

def defaults
	@defaults ||= new.attributes
end

Instance Method Details

#<=>(other) ⇒ Object



115
116
117
# File 'lib/wapiti/options.rb', line 115

def <=>(other)
	other.respond_to?(:attributes) ? attributes <=> other.attributes : nil
end

#[](name) ⇒ Object

Returns the value of the attribute identified by name or nil if there is no such attribute.



31
32
33
# File 'lib/wapiti/options.rb', line 31

def [](name)
	has_attribute?(name) ? send(name) : nil
end

#[]=(name, value) ⇒ Object

Updates the value of the attribute identified by name with the passed-in value.

Raises:

  • (ArgumentError)


37
38
39
40
# File 'lib/wapiti/options.rb', line 37

def []=(name, value)
	raise ArgumentError, "bad attribute name: #{name}" unless has_attribute?(name)
	send("#{name}=", value)
end

#algorithmObject Also known as: algo



438
439
440
441
# File 'ext/wapiti/native.c', line 438

static VALUE options_algorithm(VALUE self) {
	char *algorithm = get_options(self)->algo;
	return rb_str_new2(algorithm ? algorithm : "");
}

#algorithm=(rb_string) ⇒ Object Also known as: algo=



443
444
445
446
447
448
# File 'ext/wapiti/native.c', line 443

static VALUE options_set_algorithm(VALUE self, VALUE rb_string) {
	opt_t *options = get_options(self);
	copy_string(&(options->algo), rb_string);
	
	return rb_string;
}

#alphaObject



270
271
272
# File 'ext/wapiti/native.c', line 270

static VALUE options_alpha(VALUE self) {
	return rb_float_new(get_options(self)->sgdl1.alpha);
}

#alpha=(rb_numeric) ⇒ Object



274
275
276
277
# File 'ext/wapiti/native.c', line 274

static VALUE options_set_alpha(VALUE self, VALUE rb_numeric) {
	get_options(self)->sgdl1.alpha = NUM2DBL(rb_numeric);
	return rb_numeric;
}

#attributesObject Also known as: to_hash

Returns a hash of all the attributes with their names and values.



73
74
75
# File 'lib/wapiti/options.rb', line 73

def attributes
	Hash[*Options.attribute_names.map { |a| [a, send(a)] }.flatten]
end

#bcdObject



61
62
63
# File 'lib/wapiti/options.rb', line 61

def bcd
	{ :kappa => kappa }
end

#checkObject Also known as: check?



355
356
357
# File 'ext/wapiti/native.c', line 355

static VALUE options_check(VALUE self) {
	return get_options(self)->check ? Qtrue : Qfalse;	
}

#check=(rb_boolean) ⇒ Object



359
360
361
362
# File 'ext/wapiti/native.c', line 359

static VALUE options_set_check(VALUE self, VALUE rb_boolean) {
	get_options(self)->check = !(TYPE(rb_boolean) == T_NIL || !rb_boolean);	
	return rb_boolean;
}

#clipObject



391
392
393
# File 'ext/wapiti/native.c', line 391

static VALUE options_clip(VALUE self) {
	return get_options(self)->lbfgs.clip ? Qtrue : Qfalse;	
}

#clip=(rb_boolean) ⇒ Object



395
396
397
398
# File 'ext/wapiti/native.c', line 395

static VALUE options_set_clip(VALUE self, VALUE rb_boolean) {
	get_options(self)->lbfgs.clip = !(TYPE(rb_boolean) == T_NIL || !rb_boolean);	
	return rb_boolean;
}

#compactObject Also known as: compact?



337
338
339
# File 'ext/wapiti/native.c', line 337

static VALUE options_compact(VALUE self) {
	return get_options(self)->compact ? Qtrue : Qfalse;	
}

#compact=(rb_boolean) ⇒ Object



341
342
343
344
# File 'ext/wapiti/native.c', line 341

static VALUE options_set_compact(VALUE self, VALUE rb_boolean) {
	get_options(self)->compact = !(TYPE(rb_boolean) == T_NIL || !rb_boolean);
	return rb_boolean;
}

#cutoffObject



400
401
402
# File 'ext/wapiti/native.c', line 400

static VALUE options_cutoff(VALUE self) {
	return get_options(self)->rprop.cutoff ? Qtrue : Qfalse;	
}

#cutoff=(rb_boolean) ⇒ Object



404
405
406
407
# File 'ext/wapiti/native.c', line 404

static VALUE options_set_cutoff(VALUE self, VALUE rb_boolean) {
	get_options(self)->rprop.cutoff = !(TYPE(rb_boolean) == T_NIL || !rb_boolean);	
	return rb_boolean;
}

#development_dataObject Also known as: devel



450
451
452
453
# File 'ext/wapiti/native.c', line 450

static VALUE options_development_data(VALUE self) {
	char *development_data = get_options(self)->devel;
	return rb_str_new2(development_data ? development_data : "");
}

#development_data=(rb_string) ⇒ Object Also known as: devel=



455
456
457
458
459
460
# File 'ext/wapiti/native.c', line 455

static VALUE options_set_development_data(VALUE self, VALUE rb_string) {
	opt_t *options = get_options(self);
	copy_string(&(options->devel), rb_string);
	
	return rb_string;
}

#eta0Object



261
262
263
# File 'ext/wapiti/native.c', line 261

static VALUE options_eta0(VALUE self) {
	return rb_float_new(get_options(self)->sgdl1.eta0);
}

#eta0=(rb_numeric) ⇒ Object



265
266
267
268
# File 'ext/wapiti/native.c', line 265

static VALUE options_set_eta0(VALUE self, VALUE rb_numeric) {
	get_options(self)->sgdl1.eta0 = NUM2DBL(rb_numeric);
	return rb_numeric;
}

#has_attribute?(attribute) ⇒ Boolean

Returns:

  • (Boolean)


79
80
81
# File 'lib/wapiti/options.rb', line 79

def has_attribute?(attribute)
	Options.attribute_names.include?(attribute)
end

#histszObject



209
210
211
# File 'ext/wapiti/native.c', line 209

static VALUE options_histsz(VALUE self) {
	return INT2FIX(get_options(self)->lbfgs.histsz);
}

#histsz=(rb_fixnum) ⇒ Object



213
214
215
216
217
218
# File 'ext/wapiti/native.c', line 213

static VALUE options_set_histsz(VALUE self, VALUE rb_fixnum) {
	Check_Type(rb_fixnum, T_FIXNUM);	
	get_options(self)->lbfgs.histsz = FIX2INT(rb_fixnum);
	
	return rb_fixnum;
}

#jobsizeObject



183
184
185
# File 'ext/wapiti/native.c', line 183

static VALUE options_jobsize(VALUE self) {
	return INT2FIX(get_options(self)->jobsize);
}

#jobsize=(rb_fixnum) ⇒ Object



187
188
189
190
191
192
193
194
# File 'ext/wapiti/native.c', line 187

static VALUE options_set_jobsize(VALUE self, VALUE rb_fixnum) {
	opt_t *options = get_options(self);

	Check_Type(rb_fixnum, T_FIXNUM);
	options->jobsize = FIX2INT(rb_fixnum);
	
	return rb_fixnum;
}

#kappaObject



279
280
281
# File 'ext/wapiti/native.c', line 279

static VALUE options_kappa(VALUE self) {
	return rb_float_new(get_options(self)->bcd.kappa);
}

#kappa=(rb_numeric) ⇒ Object



283
284
285
286
# File 'ext/wapiti/native.c', line 283

static VALUE options_set_kappa(VALUE self, VALUE rb_numeric) {
	get_options(self)->bcd.kappa = NUM2DBL(rb_numeric);
	return rb_numeric;
}

#lbfgsObject



53
54
55
# File 'lib/wapiti/options.rb', line 53

def lbfgs
	{ :clip => clip, :histsz => histsz, :maxls => maxls }
end

#lblpostObject Also known as: lblpost?, posterior, posterior?



382
383
384
# File 'ext/wapiti/native.c', line 382

static VALUE options_lblpost(VALUE self) {
	return get_options(self)->lblpost ? Qtrue : Qfalse;	
}

#lblpost=(rb_boolean) ⇒ Object Also known as: posterior=



386
387
388
389
# File 'ext/wapiti/native.c', line 386

static VALUE options_set_lblpost(VALUE self, VALUE rb_boolean) {
	get_options(self)->lblpost = !(TYPE(rb_boolean) == T_NIL || !rb_boolean);	
	return rb_boolean;
}

#maxentObject Also known as: maxent?

Boolean Accessors



328
329
330
# File 'ext/wapiti/native.c', line 328

static VALUE options_maxent(VALUE self) {
	return get_options(self)->maxent ? Qtrue : Qfalse;	
}

#maxent=(rb_boolean) ⇒ Object



332
333
334
335
# File 'ext/wapiti/native.c', line 332

static VALUE options_set_maxent(VALUE self, VALUE rb_boolean) {
	get_options(self)->maxent = !(TYPE(rb_boolean) == T_NIL || !rb_boolean);	
	return rb_boolean;
}

#maxiterObject Also known as: max_iterations



170
171
172
# File 'ext/wapiti/native.c', line 170

static VALUE options_maxiter(VALUE self) {
	return INT2FIX(get_options(self)->maxiter);
}

#maxiter=(rb_fixnum) ⇒ Object Also known as: max_iterations=



174
175
176
177
178
179
180
181
# File 'ext/wapiti/native.c', line 174

static VALUE options_set_maxiter(VALUE self, VALUE rb_fixnum) {
	opt_t *options = get_options(self);

	Check_Type(rb_fixnum, T_FIXNUM);
	options->maxiter = FIX2INT(rb_fixnum);
	
	return rb_fixnum;
}

#maxlsObject



220
221
222
# File 'ext/wapiti/native.c', line 220

static VALUE options_maxls(VALUE self) {
	return INT2FIX(get_options(self)->lbfgs.maxls);
}

#maxls=(rb_fixnum) ⇒ Object



224
225
226
227
228
229
# File 'ext/wapiti/native.c', line 224

static VALUE options_set_maxls(VALUE self, VALUE rb_fixnum) {
	Check_Type(rb_fixnum, T_FIXNUM);	
	get_options(self)->lbfgs.maxls = FIX2INT(rb_fixnum);
	
	return rb_fixnum;
}

#modelObject



426
427
428
429
# File 'ext/wapiti/native.c', line 426

static VALUE options_model(VALUE self) {
	char *model = get_options(self)->model;
	return rb_str_new2(model ? model : "");
}

#model=(rb_string) ⇒ Object



431
432
433
434
435
436
# File 'ext/wapiti/native.c', line 431

static VALUE options_set_model(VALUE self, VALUE rb_string) {
	opt_t *options = get_options(self);
	copy_string(&(options->model), rb_string);
	
	return rb_string;
}

#nbestObject

Fixnum Accessors



135
136
137
# File 'ext/wapiti/native.c', line 135

static VALUE options_nbest(VALUE self) {
	return INT2FIX(get_options(self)->nbest);
}

#nbest=(rb_fixnum) ⇒ Object



139
140
141
142
143
144
# File 'ext/wapiti/native.c', line 139

static VALUE options_set_nbest(VALUE self, VALUE rb_fixnum) {
	Check_Type(rb_fixnum, T_FIXNUM);	
	get_options(self)->nbest = FIX2INT(rb_fixnum);
	
	return rb_fixnum;
}

#nthreadObject Also known as: threads



196
197
198
# File 'ext/wapiti/native.c', line 196

static VALUE options_nthread(VALUE self) {
	return INT2FIX(get_options(self)->nthread);
}

#nthread=(rb_fixnum) ⇒ Object Also known as: threads=



200
201
202
203
204
205
206
207
# File 'ext/wapiti/native.c', line 200

static VALUE options_set_nthread(VALUE self, VALUE rb_fixnum) {
	opt_t *options = get_options(self);

	Check_Type(rb_fixnum, T_FIXNUM);
	options->nthread = FIX2INT(rb_fixnum);
	
	return rb_fixnum;
}

#objwinObject Also known as: convergence_window



158
159
160
# File 'ext/wapiti/native.c', line 158

static VALUE options_objwin(VALUE self) {
	return INT2FIX(get_options(self)->objwin);
}

#objwin=(rb_fixnum) ⇒ Object Also known as: convergence_window=



162
163
164
165
166
167
# File 'ext/wapiti/native.c', line 162

static VALUE options_set_objwin(VALUE self, VALUE rb_fixnum) {
	Check_Type(rb_fixnum, T_FIXNUM);
	get_options(self)->objwin = FIX2INT(rb_fixnum);
	
	return rb_fixnum;
}

#outscObject Also known as: outsc?, score, score?



373
374
375
# File 'ext/wapiti/native.c', line 373

static VALUE options_outsc(VALUE self) {
	return get_options(self)->outsc ? Qtrue : Qfalse;	
}

#outsc=(rb_boolean) ⇒ Object Also known as: score=



377
378
379
380
# File 'ext/wapiti/native.c', line 377

static VALUE options_set_outsc(VALUE self, VALUE rb_boolean) {
	get_options(self)->outsc = !(TYPE(rb_boolean) == T_NIL || !rb_boolean);	
	return rb_boolean;
}

#patternObject Also known as: template

String Accessors



414
415
416
417
# File 'ext/wapiti/native.c', line 414

static VALUE options_pattern(VALUE self) {
	char *pattern = get_options(self)->pattern;
	return rb_str_new2(pattern ? pattern : "");
}

#pattern=(rb_string) ⇒ Object Also known as: template=



419
420
421
422
423
424
# File 'ext/wapiti/native.c', line 419

static VALUE options_set_pattern(VALUE self, VALUE rb_string) {
	opt_t *options = get_options(self);
	copy_string(&(options->pattern), rb_string);
	
	return rb_string;
}

#rho1Object

Float Accessors



234
235
236
# File 'ext/wapiti/native.c', line 234

static VALUE options_rho1(VALUE self) {
	return rb_float_new(get_options(self)->rho1);
}

#rho1=(rb_numeric) ⇒ Object



238
239
240
241
# File 'ext/wapiti/native.c', line 238

static VALUE options_set_rho1(VALUE self, VALUE rb_numeric) {
	get_options(self)->rho1 = NUM2DBL(rb_numeric);
	return rb_numeric;
}

#rho2Object



243
244
245
# File 'ext/wapiti/native.c', line 243

static VALUE options_rho2(VALUE self) {
	return rb_float_new(get_options(self)->rho2);
}

#rho2=(rb_numeric) ⇒ Object



247
248
249
250
# File 'ext/wapiti/native.c', line 247

static VALUE options_set_rho2(VALUE self, VALUE rb_numeric) {
	get_options(self)->rho2 = NUM2DBL(rb_numeric);
	return rb_numeric;
}

#rpropObject



65
66
67
68
69
70
# File 'lib/wapiti/options.rb', line 65

def rprop
	{
		:stpmin => stpmin, :stpmax => stpmax, :stpinc => stpinc,
		:stpdec => stpdec, :cutoff => cutoff
	}
end

#sgdl1Object



57
58
59
# File 'lib/wapiti/options.rb', line 57

def sgdl1
	{ :eta0 => eta0, :alpha => alpha }
end

#skip_tokensObject Also known as: skip_tokens?



364
365
366
# File 'ext/wapiti/native.c', line 364

static VALUE options_label(VALUE self) {
	return get_options(self)->label ? Qtrue : Qfalse;	
}

#skip_tokens=(rb_boolean) ⇒ Object



368
369
370
371
# File 'ext/wapiti/native.c', line 368

static VALUE options_set_label(VALUE self, VALUE rb_boolean) {
	get_options(self)->label = !(TYPE(rb_boolean) == T_NIL || !rb_boolean);	
	return rb_boolean;
}

#sparseObject Also known as: sparse?



346
347
348
# File 'ext/wapiti/native.c', line 346

static VALUE options_sparse(VALUE self) {
	return get_options(self)->sparse ? Qtrue : Qfalse;	
}

#sparse=(rb_boolean) ⇒ Object



350
351
352
353
# File 'ext/wapiti/native.c', line 350

static VALUE options_set_sparse(VALUE self, VALUE rb_boolean) {
	get_options(self)->sparse = !(TYPE(rb_boolean) == T_NIL || !rb_boolean);
	return rb_boolean;
}

#stopepsObject Also known as: stop_epsilon



252
253
254
# File 'ext/wapiti/native.c', line 252

static VALUE options_stopeps(VALUE self) {
	return rb_float_new(get_options(self)->stopeps);
}

#stopeps=(rb_numeric) ⇒ Object Also known as: stop_epsilon=



256
257
258
259
# File 'ext/wapiti/native.c', line 256

static VALUE options_set_stopeps(VALUE self, VALUE rb_numeric) {
	get_options(self)->stopeps = NUM2DBL(rb_numeric);
	return rb_numeric;
}

#stopwinObject Also known as: stop_window

Option Accessors



147
148
149
# File 'ext/wapiti/native.c', line 147

static VALUE options_stopwin(VALUE self) {
	return INT2FIX(get_options(self)->stopwin);
}

#stopwin=(rb_fixnum) ⇒ Object Also known as: stop_window=



151
152
153
154
155
156
# File 'ext/wapiti/native.c', line 151

static VALUE options_set_stopwin(VALUE self, VALUE rb_fixnum) {
	Check_Type(rb_fixnum, T_FIXNUM);
	get_options(self)->stopwin = FIX2INT(rb_fixnum);
	
	return rb_fixnum;
}

#stpdecObject



315
316
317
# File 'ext/wapiti/native.c', line 315

static VALUE options_stpdec(VALUE self) {
	return rb_float_new(get_options(self)->rprop.stpdec);
}

#stpdec=(rb_numeric) ⇒ Object



319
320
321
322
# File 'ext/wapiti/native.c', line 319

static VALUE options_set_stpdec(VALUE self, VALUE rb_numeric) {
	get_options(self)->rprop.stpdec = NUM2DBL(rb_numeric);
	return rb_numeric;
}

#stpincObject



306
307
308
# File 'ext/wapiti/native.c', line 306

static VALUE options_stpinc(VALUE self) {
	return rb_float_new(get_options(self)->rprop.stpinc);
}

#stpinc=(rb_numeric) ⇒ Object



310
311
312
313
# File 'ext/wapiti/native.c', line 310

static VALUE options_set_stpinc(VALUE self, VALUE rb_numeric) {
	get_options(self)->rprop.stpinc = NUM2DBL(rb_numeric);
	return rb_numeric;
}

#stpmaxObject



297
298
299
# File 'ext/wapiti/native.c', line 297

static VALUE options_stpmax(VALUE self) {
	return rb_float_new(get_options(self)->rprop.stpmax);
}

#stpmax=(rb_numeric) ⇒ Object



301
302
303
304
# File 'ext/wapiti/native.c', line 301

static VALUE options_set_stpmax(VALUE self, VALUE rb_numeric) {
	get_options(self)->rprop.stpmax = NUM2DBL(rb_numeric);
	return rb_numeric;
}

#stpminObject



288
289
290
# File 'ext/wapiti/native.c', line 288

static VALUE options_stpmin(VALUE self) {
	return rb_float_new(get_options(self)->rprop.stpmin);
}

#stpmin=(rb_numeric) ⇒ Object



292
293
294
295
# File 'ext/wapiti/native.c', line 292

static VALUE options_set_stpmin(VALUE self, VALUE rb_numeric) {
	get_options(self)->rprop.stpmin = NUM2DBL(rb_numeric);
	return rb_numeric;
}

#update(attributes = {}) ⇒ Object Also known as: update_attributes

Updates all the attributes from the passed-in hash.



43
44
45
46
47
48
49
# File 'lib/wapiti/options.rb', line 43

def update(attributes = {})
	attributes.each_pair do |k,v|
		mid = "#{k}="
		send(mid, v) if respond_to?(mid)
	end	
	self
end

#valid?Boolean

Returns:

  • (Boolean)


87
88
89
# File 'lib/wapiti/options.rb', line 87

def valid?
	validate.empty?
end

#valid_algorithm?Boolean

Returns:

  • (Boolean)


83
84
85
# File 'lib/wapiti/options.rb', line 83

def valid_algorithm?
	self.class.algorithms.include?(algorithm)
end

#validateObject



91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# File 'lib/wapiti/options.rb', line 91

def validate
	e = []

	%w{ threads jobsize alpha histsz maxls eta0 alpha nbest }.each do |name|
		e << "invalid value for #{name}: #{send(name)}" unless send(name) > 0
	end

	%w{ rho1 rho2 }.each do |name|
		e << "invalid value for #{name}: #{send(name)}" unless send(name) >= 0.0
	end
	
	e << "unknown algorithm: #{algorithm}" unless valid_algorithm?			
	e << "BCD not supported for training maxent models" if maxent && algorithm == 'bcd'
	e
end