Module: Linguistics::EN

Defined in:
lib/linguistics/en.rb,
lib/linguistics/en/wordnet.rb,
lib/linguistics/en/infinitive.rb,
lib/linguistics/en/linkparser.rb

Overview

This file contains the extensions to the Linguistics::EN module which provide support for the Ruby LinkParser module. LinkParser enables grammatic queries of English language sentences.

Synopsis

# Test to see whether or not the link parser is loaded.
Linguistics::EN.has_link_parser?
# => true

# Diagram the first linkage for a test sentence
puts "he is a big dog".sentence.linkages.first.to_s

—O*— | --Ds--

 +Ss+ |  +-A-+ 
 |  | |  |   | 
he is a big dog

# Find the verb in the sentence
"he is a big dog".en.sentence.verb.to_s      
# => "is"

# Combined infinitive + LinkParser: Find the infinitive form of the verb of the
given sentence.
"he is a big dog".en.sentence.verb.infinitive
# => "be"

# Find the direct object of the sentence
"he is a big dog".en.sentence.object.to_s
# => "dog"

# Combine WordNet + LinkParser to find the definition of the direct object of
# the sentence
"he is a big dog".en.sentence.object.gloss
# => "a member of the genus Canis (probably descended from the common wolf) that
has been domesticated by man since prehistoric times; occurs in many breeds;
\"the dog barked all night\""

Authors

:include: LICENSE

Please see the file LICENSE in the base directory for licensing details.

Defined Under Namespace

Classes: Infinitive

Constant Summary collapse

PL_sb_irregular_s =

Plurals

{
	"ephemeris"	=> "ephemerides",
	"iris"		=> "irises|irides",
	"clitoris"	=> "clitorises|clitorides",
	"corpus"	=> "corpuses|corpora",
	"opus"		=> "opuses|opera",
	"genus"		=> "genera",
	"mythos"	=> "mythoi",
	"penis"		=> "penises|penes",
	"testis"	=> "testes",
}
PL_sb_irregular_h =
{
	"child"		=> "children",
	"brother"	=> "brothers|brethren",
	"loaf"		=> "loaves",
	"hoof"		=> "hoofs|hooves",
	"beef"		=> "beefs|beeves",
	"money"		=> "monies",
	"mongoose"	=> "mongooses",
	"ox"		=> "oxen",
	"cow"		=> "cows|kine",
	"soliloquy"	=> "soliloquies",
	"graffito"	=> "graffiti",
	"prima donna"	=> "prima donnas|prime donne",
	"octopus"	=> "octopuses|octopodes",
	"genie"		=> "genies|genii",
	"ganglion"	=> "ganglions|ganglia",
	"trilby"	=> "trilbys",
	"turf"		=> "turfs|turves",
}.update( PL_sb_irregular_s )
PL_sb_irregular =
matchgroup PL_sb_irregular_h.keys
PL_sb_C_a_ata =

Classical “..a” -> “..ata”

matchgroup %w[
	anathema bema carcinoma charisma diploma
	dogma drama edema enema enigma lemma
	lymphoma magma melisma miasma oedema
	sarcoma schema soma stigma stoma trauma
	gumma pragma
].collect {|word| word[0...-1]}
PL_sb_U_a_ae =

Unconditional “..a” -> “..ae”

matchgroup %w[
	alumna alga vertebra persona
]
PL_sb_C_a_ae =

Classical “..a” -> “..ae”

matchgroup %w[
	amoeba antenna formula hyperbola
	medusa nebula parabola abscissa
	hydra nova lacuna aurora .*umbra
	flora fauna
]
PL_sb_C_en_ina =

Classical “..en” -> “..ina”

matchgroup %w[
	stamen	foramen	lumen
].collect {|word| word[0...-2] }
PL_sb_U_um_a =

Unconditional “..um” -> “..a”

matchgroup %w[
	bacterium	agendum	desideratum	erratum
	stratum	datum	ovum		extremum
	candelabrum
].collect {|word| word[0...-2] }
PL_sb_C_um_a =

Classical “..um” -> “..a”

matchgroup %w[
	maximum	minimum	momentum	optimum
	quantum	cranium	curriculum	dictum
	phylum	aquarium	compendium	emporium
	enconium	gymnasium	honorarium	interregnum
	lustrum 	memorandum	millenium 	rostrum 
	spectrum	speculum	stadium	trapezium
	ultimatum	medium	vacuum	velum 
	consortium
].collect {|word| word[0...-2]}
PL_sb_U_us_i =

Unconditional “..us” -> “i”

matchgroup %w[
	alumnus	alveolus	bacillus	bronchus
	locus	nucleus	stimulus	meniscus
].collect {|word| word[0...-2]}
PL_sb_C_us_i =

Classical “..us” -> “..i”

matchgroup %w[
	focus	radius	genius
	incubus	succubus	nimbus
	fungus	nucleolus	stylus
	torus	umbilicus	uterus
	hippopotamus
].collect {|word| word[0...-2]}
PL_sb_C_us_us =

Classical “..us” -> “..us” (assimilated 4th declension latin nouns)

matchgroup %w[
	status apparatus prospectus sinus
	hiatus impetus plexus
]
PL_sb_U_on_a =

Unconditional “..on” -> “a”

matchgroup %w[
	criterion	perihelion	aphelion
	phenomenon	prolegomenon	noumenon
	organon	asyndeton	hyperbaton
].collect {|word| word[0...-2]}
PL_sb_C_on_a =

Classical “..on” -> “..a”

matchgroup %w[
	oxymoron
].collect {|word| word[0...-2]}
PL_sb_C_o_i_a =

Classical “..o” -> “..i” (but normally -> “..os”)

%w[
	solo		soprano	basso	alto
	contralto	tempo	piano
]
PL_sb_C_o_i =
matchgroup PL_sb_C_o_i_a.collect{|word| word[0...-1]}
PL_sb_U_o_os =

Always “..o” -> “..os”

matchgroup( %w[
	albino	archipelago	armadillo
	commando	crescendo	fiasco
	ditto	dynamo	embryo
	ghetto	guano	inferno
	jumbo	lumbago	magneto
	manifesto	medico	octavo
	photo	pro		quarto	
	canto	lingo	generalissimo
	stylo	rhino
] | PL_sb_C_o_i_a )
PL_sb_U_ex_ices =

Unconditional “..[ei]x” -> “..ices”

matchgroup %w[
	codex	murex	silex
].collect {|word| word[0...-2]}
PL_sb_U_ix_ices =
matchgroup %w[
	radix	helix
].collect {|word| word[0...-2]}
PL_sb_C_ex_ices =

Classical “..[ei]x” -> “..ices”

matchgroup %w[
	vortex	vertex	cortex	latex
	pontifex	apex		index	simplex
].collect {|word| word[0...-2]}
PL_sb_C_ix_ices =
matchgroup %w[
	appendix
].collect {|word| word[0...-2]}
PL_sb_C_i =

Arabic: “..” -> “..i”

matchgroup %w[
	afrit	afreet	efreet
]
PL_sb_C_im =

Hebrew: “..” -> “..im”

matchgroup %w[
	goy		seraph	cherub
]
PL_sb_U_man_mans =

Unconditional “..man” -> “..mans”

matchgroup %w[
	human
	Alabaman Bahaman Burman German
	Hiroshiman Liman Nakayaman Oklahoman
	Panaman Selman Sonaman Tacoman Yakiman
	Yokohaman Yuman
]
PL_sb_uninflected_s =
[
	# Pairs or groups subsumed to a singular...
	"breeches", "britches", "clippers", "gallows", "hijinks",
	"headquarters", "pliers", "scissors", "testes", "herpes",
	"pincers", "shears", "proceedings", "trousers",

	# Unassimilated Latin 4th declension
	"cantus", "coitus", "nexus",

	# Recent imports...
	"contretemps", "corps", "debris",
	".*ois",

	# Diseases
	".*measles", "mumps",

	# Miscellaneous others...
	"diabetes", "jackanapes", "series", "species", "rabies",
	"chassis", "innings", "news", "mews",
]
PL_sb_uninflected_herd =

Don’t inflect in classical mode, otherwise normal inflection

matchgroup %w[
	wildebeest swine eland bison buffalo
	elk moose rhinoceros
]
PL_sb_uninflected =
matchgroup [

	# Some fish and herd animals
	".*fish", "tuna", "salmon", "mackerel", "trout",
	"bream", "sea[- ]bass", "carp", "cod", "flounder", "whiting", 

	".*deer", ".*sheep", 

	# All nationals ending in -ese
	"Portuguese", "Amoyese", "Borghese", "Congoese", "Faroese",
	"Foochowese", "Genevese", "Genoese", "Gilbertese", "Hottentotese",
	"Kiplingese", "Kongoese", "Lucchese", "Maltese", "Nankingese",
	"Niasese", "Pekingese", "Piedmontese", "Pistoiese", "Sarawakese",
	"Shavese", "Vermontese", "Wenchowese", "Yengeese",
	".*[nrlm]ese",

	# Some words ending in ...s (often pairs taken as a whole)
	PL_sb_uninflected_s,

	# Diseases
	".*pox",

	# Other oddities
	"graffiti", "djinn"
]
PL_sb_singular_s =

Singular words ending in …s (all inflect with …es)

matchgroup %w[
	.*ss
	acropolis aegis alias arthritis asbestos atlas
	bathos bias bronchitis bursitis caddis cannabis
	canvas chaos cosmos dais digitalis encephalitis
	epidermis ethos eyas gas glottis hepatitis
	hubris ibis lens mantis marquis metropolis
	neuritis pathos pelvis polis rhinoceros
	sassafras tonsillitis trellis .*us
]
PL_v_special_s =
matchgroup [
	PL_sb_singular_s,
	PL_sb_uninflected_s,
	PL_sb_irregular_s.keys,
	'(.*[csx])is',
	'(.*)ceps',
	'[A-Z].*s',
]
PL_sb_postfix_adj =
'(' + {

	'general' => ['(?!major|lieutenant|brigadier|adjutant)\S+'],
	'martial' => ["court"],

}.collect {|key,val|
	matchgroup( matchgroup(val) + "(?=(?:-|\\s+)#{key})" )
}.join("|") + ")(.*)"
PL_sb_military =
%r'major|lieutenant|brigadier|adjutant|quartermaster'
PL_sb_general =
%r'((?!#{PL_sb_military.source}).*?)((-|\s+)general)'
PL_prep =
matchgroup %w[
	about above across after among around at athwart before behind
	below beneath beside besides between betwixt beyond but by
	during except for from in into near of off on onto out over
	since till to under until unto upon with
]
PL_sb_prep_dual_compound =
%r'(.*?)((?:-|\s+)(?:#{PL_prep}|d[eu])(?:-|\s+))a(?:-|\s+)(.*)'
PL_sb_prep_compound =
%r'(.*?)((-|\s+)(#{PL_prep}|d[eu])((-|\s+)(.*))?)'
PL_pron_nom_h =
{
	#	Nominative		Reflexive
	"i"		=> "we",	"myself"   =>	"ourselves",
	"you"	=> "you",	"yourself" =>	"yourselves",
	"she"	=> "they",	"herself"  =>	"themselves",
	"he"	=> "they",	"himself"  =>	"themselves",
	"it"	=> "they",	"itself"   =>	"themselves",
	"they"	=> "they",	"themself" =>	"themselves",

	#	Possessive
	"mine"	 => "ours",
	"yours"	 => "yours",
	"hers"	 => "theirs",
	"his"	 => "theirs",
	"its"	 => "theirs",
	"theirs" => "theirs",
}
PL_pron_nom =
matchgroup PL_pron_nom_h.keys
PL_pron_acc_h =
{
	#	Accusative		Reflexive
	"me"	=> "us",	"myself"   =>	"ourselves",
	"you"	=> "you",	"yourself" =>	"yourselves",
	"her"	=> "them",	"herself"  =>	"themselves",
	"him"	=> "them",	"himself"  =>	"themselves",
	"it"	=> "them",	"itself"   =>	"themselves",
	"them"	=> "them",	"themself" =>	"themselves",
}
PL_pron_acc =
matchgroup PL_pron_acc_h.keys
PL_v_irregular_pres_h =
{
	#	1St pers. sing.		2nd pers. sing.		3rd pers. singular
	#				3rd pers. (indet.)	
	"am"	=> "are",	"are"	=> "are",	"is"	 => "are",
	"was"	=> "were",	"were"	=> "were",	"was"	 => "were",
	"have"  => "have",	"have"  => "have",	"has"	 => "have",
}
PL_v_irregular_pres =
matchgroup PL_v_irregular_pres_h.keys
PL_v_ambiguous_pres_h =
{
	#	1st pers. sing.		2nd pers. sing.		3rd pers. singular
	#				3rd pers. (indet.)	
	"act"	=> "act",	"act"	=> "act",	"acts"	  => "act",
	"blame"	=> "blame",	"blame"	=> "blame",	"blames"  => "blame",
	"can"	=> "can",	"can"	=> "can",	"can"	  => "can",
	"must"	=> "must",	"must"	=> "must",	"must"	  => "must",
	"fly"	=> "fly",	"fly"	=> "fly",	"flies"	  => "fly",
	"copy"	=> "copy",	"copy"	=> "copy",	"copies"  => "copy",
	"drink"	=> "drink",	"drink"	=> "drink",	"drinks"  => "drink",
	"fight"	=> "fight",	"fight"	=> "fight",	"fights"  => "fight",
	"fire"	=> "fire",	"fire"	=> "fire",	"fires"   => "fire",
	"like"	=> "like",	"like"	=> "like",	"likes"   => "like",
	"look"	=> "look",	"look"	=> "look",	"looks"   => "look",
	"make"	=> "make",	"make"	=> "make",	"makes"   => "make",
	"reach"	=> "reach",	"reach"	=> "reach",	"reaches" => "reach",
	"run"	=> "run",	"run"	=> "run",	"runs"    => "run",
	"sink"	=> "sink",	"sink"	=> "sink",	"sinks"   => "sink",
	"sleep"	=> "sleep",	"sleep"	=> "sleep",	"sleeps"  => "sleep",
	"view"	=> "view",	"view"	=> "view",	"views"   => "view",
}
PL_v_ambiguous_pres =
matchgroup PL_v_ambiguous_pres_h.keys
PL_v_irregular_non_pres =
matchgroup %w[
	did had ate made put 
	spent fought sank gave sought
	shall could ought should
]
PL_v_ambiguous_non_pres =
matchgroup %w[
	thought saw bent will might cut
]
PL_count_zero =
matchgroup %w[
	0 no zero nil
]
PL_count_one =
matchgroup %w[
	1 a an one each every this that
]
PL_adj_special_h =
{
	"a"    => "some",	"an"   =>  "some",
	"this" => "these",	"that" => "those",
}
PL_adj_special =
matchgroup PL_adj_special_h.keys
PL_adj_poss_h =
{
	"my"    => "our",
	"your"	=> "your",
	"its"	=> "their",
	"her"	=> "their",
	"his"	=> "their",
	"their"	=> "their",
}
PL_adj_poss =
matchgroup PL_adj_poss_h.keys
Nth =

Numerical inflections

{
	0 => 'th',
	1 => 'st',
	2 => 'nd',
	3 => 'rd',
	4 => 'th',
	5 => 'th',
	6 => 'th',
	7 => 'th',
	8 => 'th',
	9 => 'th',
	11 => 'th',
	12 => 'th',
	13 => 'th',
}
Ordinals =

Ordinal word parts

{
	'ty' => 'tieth',
	'one' => 'first',
	'two' => 'second',
	'three' => 'third',
	'five' => 'fifth',
	'eight' => 'eighth',
	'nine' => 'ninth',
	'twelve' => 'twelfth',
}
OrdinalSuffixes =
Ordinals.keys.join("|") + "|"
Units =

Numeral names

[''] + %w[one two three four five six seven eight nine]
Teens =
%w[ten eleven twelve thirteen fourteen
fifteen sixteen seventeen eighteen nineteen]
Tens =
['',''] + %w[twenty thirty forty fifty sixty seventy eighty ninety]
Thousands =
[' ', ' thousand'] + %w[
	m b tr quadr quint sext sept oct non dec undec duodec tredec
	quattuordec quindec sexdec septemdec octodec novemdec vigint
].collect {|prefix| ' ' + prefix + 'illion'}
NumberToWordsFunctions =

A collection of functions for transforming digits into word phrases. Indexed by the number of digits being transformed; e.g., NumberToWordsFunctions[2] is the function for transforming double-digit numbers.

[
	proc {|*args| raise "No digits (#{args.inspect})"},

	# Single-digits
	proc {|zero,x|
		(x.nonzero? ? to_units(x) : "#{zero} ")
	},

	# Double-digits
	proc {|zero,x,y|
		if x.nonzero?
			to_tens( x, y )
		elsif y.nonzero?
			"#{zero} " + NumberToWordsFunctions[1].call( zero, y )
		else
			([zero] * 2).join(" ")
		end
	},

	# Triple-digits
	proc {|zero,x,y,z|
		NumberToWordsFunctions[1].call(zero,x) + 
		NumberToWordsFunctions[2].call(zero,y,z)
	}
]
A_abbrev =

This pattern matches strings of capitals starting with a “vowel-sound” consonant followed by another consonant, and which are not likely to be real words (oh, all right then, it’s just magic!)

%{
	(?! FJO | [HLMNS]Y.  | RY[EO] | SQU
	  | ( F[LR]? | [HL] | MN? | N | RH? | S[CHKLMNPTVW]? | X(YL)?) [AEIOU])
	[FHLMNRSX][A-Z]
}
A_y_cons =

This pattern codes the beginnings of all english words begining with a ‘y’ followed by a consonant. Any other y-consonant prefix therefore implies an abbreviation.

'y(b[lor]|cl[ea]|fere|gg|p[ios]|rou|tt)'
A_explicit_an =

Exceptions to exceptions

matchgroup(	"euler", "hour(?!i)", "heir", "honest", "hono" )
NumwordDefaults =

Default configuration arguments for the #numwords function

{
	:group		=> 0,
	:comma		=> ', ',
	:and		=> ' and ',
	:zero		=> 'zero',
	:decimal	=> 'point',
	:asArray	=> false,
}
SeveralRange =

Default ranges for #quantify

2..5
NumberRange =
6..19
NumerousRange =
20..45
ManyRange =
46..99
QuantifyDefaults =

Default configuration arguments for the #quantify function

{
	:joinword	=> " of ",
}
ConjunctionDefaults =

Default configuration arguments for the #conjunction (junction, what’s your) function.

{
	:separator		=> ', ',
	:altsep			=> '; ',
	:penultimate	=> true,
	:conjunctive	=> 'and',
	:combine		=> true,
	:casefold		=> true,
	:generalize		=> false,
	:quantsort		=> true,
}
Articles =

Build the list of exceptions to title-capitalization

%w[a and the]
ShortPrepositions =
["amid", "at", "but", "by", "down", "from", "in",
"into", "like", "near", "of", "off", "on", "onto", "out", "over",
"past", "save", "with", "till", "to", "unto", "up", "upon", "with"]
CoordConjunctions =
%w[and but as]
TitleCaseExceptions =
Articles | ShortPrepositions | CoordConjunctions
IrregularInfinitives =

Irregular words => infinitive forms

{
	'abided'			=> 'abide',
	'abode'				=> 'abide',
	'am'				=> 'be',
	'are'				=> 'be',
	'arisen'			=> 'arise',
	'arose'				=> 'arise',
	'ate'				=> 'eat',
	'awaked'			=> 'awake',
	'awoke'				=> 'awake',
	'bade'				=> 'bid',
	'beaten'			=> 'beat',
	'became'			=> 'become',
	'been'				=> 'be',
	'befallen'			=> 'befall',
	'befell'			=> 'befall',
	'began'				=> 'begin',
	'begat'				=> 'beget',
	'begot'				=> 'beget',
	'begotten'			=> 'beget',
	'begun'				=> 'begin',
	'beheld'			=> 'behold',
	'bent'				=> 'bend',
	'bereaved'			=> 'bereave',
	'bereft'			=> 'bereave',
	'beseeched'			=> 'beseech',
	'besought'			=> 'beseech',
	'bespoke'			=> 'bespeak',
	'bespoken'			=> 'bespeak',
	'bestrewed'			=> 'bestrew',
	'bestrewn'			=> 'bestrew',
	'bestrid'			=> 'bestride',
	'bestridden'		=> 'bestride',
	'bestrode'			=> 'bestride',
	'betaken'			=> 'betake',
	'bethought'			=> 'bethink',
	'betook'			=> 'betake',
	'betted'			=> 'bet',
	'bidden'			=> 'bid',
	'bided'				=> 'bide',
	'bit'				=> 'bite',
	'bitten'			=> 'bite',
	'bled'				=> 'bleed',
	'blended'			=> 'blend',
	'blent'				=> 'blend',
	'blessed'			=> 'bless',
	'blest'				=> 'bless',
	'blew'				=> 'blow',
	'blown'				=> 'blow',
	'bode'				=> 'bide',
	'bore'				=> 'bear',
	'born'				=> 'bear',
	'borne'				=> 'bear',
	'bought'			=> 'buy',
	'bound'				=> 'bind',
	'bred'				=> 'breed',
	'broadcasted'		=> 'broadcast',
	'broke'				=> 'break',
	'broken'			=> 'break',
	'brought'			=> 'bring',
	'browbeaten'		=> 'browbeat',
	'built'				=> 'build',
	'burned'			=> 'burn',
	'burnt'				=> 'burn',
	'came'				=> 'come',
	'caught'			=> 'catch',
	'chid'				=> 'chide',
	'chidden'			=> 'chide',
	'chided'			=> 'chide',
	'chose'				=> 'choose',
	'chosen'			=> 'choose',
	'clad'				=> 'clothe',
	'clave'				=> 'cleave',
	'cleaved'			=> 'cleave',
	'cleft'				=> 'cleave',
	'clothed'			=> 'clothe',
	'clove'				=> 'cleave',
	'cloven'			=> 'cleave',
	'clung'				=> 'cling',
	'costed'			=> 'cost',
	'could'				=> 'can',
	'crept'				=> 'creep',
	'crew'				=> 'crow',
	'crowed'			=> 'crow',
	'dealt'				=> 'deal',
	'did'				=> 'do',
	'done'				=> 'do',
	'dove'				=> 'dive',
	'drank'				=> 'drink',
	'drawn'				=> 'draw',
	'dreamed'			=> 'dream',
	'dreamt'			=> 'dream',
	'drew'				=> 'draw',
	'driven'			=> 'drive',
	'drove'				=> 'drive',
	'drunk'				=> 'drink',
	'dug'				=> 'dig',
	'dwelled'			=> 'dwell',
	'dwelt'				=> 'dwell',
	'eaten'				=> 'eat',
	'fallen'			=> 'fall',
	'fed'				=> 'feed',
	'fell'				=> 'fall',
	'felt'				=> 'feel',
	'fled'				=> 'flee',
	'flew'				=> 'fly',
	'flown'				=> 'fly',
	'flung'				=> 'fling',
	'forbad'			=> 'forbid',
	'forbade'			=> 'forbid',
	'forbidden'			=> 'forbid',
	'forbore'			=> 'forbear',
	'forborne'			=> 'forbear',
	'fordid'			=> 'fordo',
	'fordone'			=> 'fordo',
	'forecasted'		=> 'forecast',
	'foregone'			=> 'forego',
	'foreknew'			=> 'foreknow',
	'foreknown'			=> 'foreknow',
	'foreran'			=> 'forerun',
	'foresaw'			=> 'foresee',
	'foreshowed'		=> 'foreshow',
	'foreshown'			=> 'foreshow',
	'foretold'			=> 'foretell',
	'forewent'			=> 'forego',
	'forgave'			=> 'forgive',
	'forgiven'			=> 'forgive',
	'forgot'			=> 'forget',
	'forgotten'			=> 'forget',
	'forsaken'			=> 'forsake',
	'forseen'			=> 'foresee',
	'forsook'			=> 'forsake',
	'forswore'			=> 'forswear',
	'forsworn'			=> 'forswear',
	'fought'			=> 'fight',
	'found'				=> 'find',
	'froze'				=> 'freeze',
	'frozen'			=> 'freeze',
	'gainsaid'			=> 'gainsay',
	'gave'				=> 'give',
	'gilded'			=> 'gild',
	'gilt'				=> 'gild',
	'girded'			=> 'gird',
	'girt'				=> 'gird',
	'given'				=> 'give',
	'gone'				=> 'go',
	'got'				=> 'get',
	'gotten'			=> 'get',
	'graved'			=> 'grave',
	'graven'			=> 'grave',
	'grew'				=> 'grow',
	'ground'			=> 'grind',
	'grown'				=> 'grow',
	'had'				=> 'have',
	'hamstringed'		=> 'hamstring',
	'hamstrung'			=> 'hamstring',
	'hanged'			=> 'hang',
	'heard'				=> 'hear',
	'heaved'			=> 'heave',
	'held'				=> 'hold',
	'hewed'				=> 'hew',
	'hewn'				=> 'hew',
	'hid'				=> 'hide',
	'hidden'			=> 'hide',
	'hove'				=> 'heave',
	'hung'				=> 'hang',
	'inlaid'			=> 'inlay',
	'is'				=> 'be',
	'kept'				=> 'keep',
	'kneeled'			=> 'kneel',
	'knelt'				=> 'kneel',
	'knew'				=> 'know',
	'knitted'			=> 'knit',
	'known'				=> 'know',
	'laded'				=> 'lade',
	'laden'				=> 'lade',
	'laid'				=> 'lay',
	'lain'				=> 'lie',
	'lay'				=> 'lie',
	'leaned'			=> 'lean',
	'leant'				=> 'lean',
	'leaped'			=> 'leap',
	'leapt'				=> 'leap',
	'learned'			=> 'learn',
	'learnt'			=> 'learn',
	'led'				=> 'lead',
	'left'				=> 'leave',
	'lent'				=> 'lend',
	'lighted'			=> 'light',
	'lit'				=> 'light',
	'lost'				=> 'lose',
	'made'				=> 'make',
	'meant'				=> 'mean',
	'melted'			=> 'melt',
	'met'				=> 'meet',
	'might'				=> 'may',
	'misdealt'			=> 'misdeal',
	'misgave'			=> 'misgive',
	'misgiven'			=> 'misgive',
	'mislaid'			=> 'mislay',
	'misled'			=> 'mislead',
	'mistaken'			=> 'mistake',
	'mistook'			=> 'mistake',
	'misunderstood'		=> 'misunderstand',
	'molten'			=> 'melt',
	'mowed'				=> 'mow',
	'mown'				=> 'mow',
	'outate'			=> 'outeat',
	'outbade'			=> 'outbid',
	'outbidden'			=> 'outbid',
	'outbred'			=> 'outbreed',
	'outdid'			=> 'outdo',
	'outdone'			=> 'outdo',
	'outeaten'			=> 'outeat',
	'outfought'			=> 'outfight',
	'outgone'			=> 'outgo',
	'outgrew'			=> 'outgrow',
	'outgrown'			=> 'outgrow',
	'outlaid'			=> 'outlay',
	'outran'			=> 'outrun',
	'outridden'			=> 'outride',
	'outrode'			=> 'outride',
	'outsat'			=> 'outsit',
	'outshone'			=> 'outshine',
	'outshot'			=> 'outshoot',
	'outsold'			=> 'outsell',
	'outspent'			=> 'outspend',
	'outthrew'			=> 'outthrow',
	'outthrown'			=> 'outthrow',
	'outwent'			=> 'outgo',
	'outwore'			=> 'outwear',
	'outworn'			=> 'outwear',
	'overate'			=> 'overeat',
	'overbade'			=> 'overbid',
	'overbidden'		=> 'overbid',
	'overblew'			=> 'overblow',
	'overblown'			=> 'overblow',
	'overbore'			=> 'overbear',
	'overborn'			=> 'overbear',
	'overborne'			=> 'overbear',
	'overbought'		=> 'overbuy',
	'overbuilt'			=> 'overbuild',
	'overcame'			=> 'overcome',
	'overdid'			=> 'overdo',
	'overdone'			=> 'overdo',
	'overdrawn'			=> 'overdraw',
	'overdrew'			=> 'overdraw',
	'overdriven'		=> 'overdrive',
	'overdrove'			=> 'overdrive',
	'overeaten'			=> 'overeat',
	'overfed'			=> 'overfeed',
	'overflew'			=> 'overfly',
	'overflown'			=> 'overfly',
	'overgrew'			=> 'overgrow',
	'overgrown'			=> 'overgrow',
	'overhanged'		=> 'overhang',
	'overheard'			=> 'overhear',
	'overhung'			=> 'overhang',
	'overladed'			=> 'overlade',
	'overladen'			=> 'overlade',
	'overlaid'			=> 'overlay',
	'overlain'			=> 'overlie',
	'overlay'			=> 'overlie',
	'overleaped'		=> 'overleap',
	'overleapt'			=> 'overleap',
	'overpaid'			=> 'overpay',
	'overran'			=> 'overrun',
	'overridden'		=> 'override',
	'overrode'			=> 'override',
	'oversaw'			=> 'oversee',
	'overseen'			=> 'oversee',
	'oversewed'			=> 'oversew',
	'oversewn'			=> 'oversew',
	'overshot'			=> 'overshoot',
	'overslept'			=> 'oversleep',
	'overspent'			=> 'overspend',
	'overtaken'			=> 'overtake',
	'overthrew'			=> 'overthrow',
	'overthrown'		=> 'overthrow',
	'overtook'			=> 'overtake',
	'overwinded'		=> 'overwind',
	'overwound'			=> 'overwind',
	'overwritten'		=> 'overwrite',
	'overwrote'			=> 'overwrite',
	'paid'				=> 'pay',
	'partaken'			=> 'partake',
	'partook'			=> 'partake',
	'prechose'			=> 'prechoose',
	'prechosen'			=> 'prechoose',
	'proved'			=> 'prove',
	'proven'			=> 'prove',
	'quitted'			=> 'quit',
	'ran'				=> 'run',
	'rang'				=> 'ring',
	'reaved'			=> 'reave',
	'rebuilt'			=> 'rebuild',
	'reeved'			=> 'reeve',
	'reft'				=> 'reave',
	'relaid'			=> 'relay',
	'rent'				=> 'rend',
	'repaid'			=> 'repay',
	'retold'			=> 'retell',
	'ridded'			=> 'rid',
	'ridden'			=> 'ride',
	'risen'				=> 'rise',
	'rived'				=> 'rive',
	'riven'				=> 'rive',
	'rode'				=> 'ride',
	'rose'				=> 'rise',
	'rove'				=> 'reeve',
	'rung'				=> 'ring',
	'said'				=> 'say',
	'sang'				=> 'sing',
	'sank'				=> 'sink',
	'sat'				=> 'sit',
	'saw'				=> 'see',
	'sawed'				=> 'saw',
	'sawn'				=> 'saw',
	'seen'				=> 'see',
	'sent'				=> 'send',
	'sewed'				=> 'sew',
	'sewn'				=> 'sew',
	'shaken'			=> 'shake',
	'shaved'			=> 'shave',
	'shaven'			=> 'shave',
	'sheared'			=> 'shear',
	'shined'			=> 'shine',
	'shod'				=> 'shoe',
	'shoed'				=> 'shoe',
	'shone'				=> 'shine',
	'shook'				=> 'shake',
	'shorn'				=> 'shear',
	'shot'				=> 'shoot',
	'showed'			=> 'show',
	'shown'				=> 'show',
	'shrank'			=> 'shrink',
	'shredded'			=> 'shred',
	'shrived'			=> 'shrive',
	'shriven'			=> 'shrive',
	'shrove'			=> 'shrive',
	'shrunk'			=> 'shrink',
	'shrunken'			=> 'shrink',
	'slain'				=> 'slay',
	'slept'				=> 'sleep',
	'slew'				=> 'slay',
	'slid'				=> 'slide',
	'slidden'			=> 'slide',
	'slung'				=> 'sling',
	'slunk'				=> 'slink',
	'smelled'			=> 'smell',
	'smelt'				=> 'smell',
	'smitten'			=> 'smite',
	'smote'				=> 'smite',
	'snuck'				=> 'sneak',
	'sold'				=> 'sell',
	'sought'			=> 'seek',
	'sowed'				=> 'sow',
	'sown'				=> 'sow',
	'span'				=> 'spin',
	'spat'				=> 'spit',
	'sped'				=> 'speed',
	'speeded'			=> 'speed',
	'spelled'			=> 'spell',
	'spelt'				=> 'spell',
	'spent'				=> 'spend',
	'spilled'			=> 'spill',
	'spilt'				=> 'spill',
	'spoiled'			=> 'spoil',
	'spoilt'			=> 'spoil',
	'spoke'				=> 'speak',
	'spoken'			=> 'speak',
	'sprang'			=> 'spring',
	'sprung'			=> 'spring',
	'spun'				=> 'spin',
	'stank'				=> 'stink',
	'staved'			=> 'stave',
	'stole'				=> 'steal',
	'stolen'			=> 'steal',
	'stood'				=> 'stand',
	'stove'				=> 'stave',
	'strewed'			=> 'strew',
	'strewn'			=> 'strew',
	'stricken'			=> 'strike',
	'strid'				=> 'stride',
	'stridden'			=> 'stride',
	'strived'			=> 'strive',
	'striven'			=> 'strive',
	'strode'			=> 'stride',
	'strove'			=> 'strive',
	'struck'			=> 'strike',
	'strung'			=> 'string',
	'stuck'				=> 'stick',
	'stung'				=> 'sting',
	'stunk'				=> 'stink',
	'sung'				=> 'sing',
	'sunk'				=> 'sink',
	'sunken'			=> 'sink',
	'swam'				=> 'swim',
	'sweated'			=> 'sweat',
	'swelled'			=> 'swell',
	'swept'				=> 'sweep',
	'swollen'			=> 'swell',
	'swore'				=> 'swear',
	'sworn'				=> 'swear',
	'swum'				=> 'swim',
	'swung'				=> 'swing',
	'taken'				=> 'take',
	'taught'			=> 'teach',
	'thought'			=> 'think',
	'threw'				=> 'throw',
	'thrived'			=> 'thrive',
	'thriven'			=> 'thrive',
	'throve'			=> 'thrive',
	'thrown'			=> 'throw',
	'told'				=> 'tell',
	'took'				=> 'take',
	'tore'				=> 'tear',
	'torn'				=> 'tear',
	'trod'				=> 'tread',
	'trodden'			=> 'tread',
	'unbent'			=> 'unbend',
	'unbound'			=> 'unbind',
	'unbuilt'			=> 'unbuild',
	'underbought'		=> 'underbuy',
	'underfed'			=> 'underfeed',
	'undergone'			=> 'undergo',
	'underlaid'			=> 'underlay',
	'underlain'			=> 'underlie',
	'underlay'			=> 'underlie',
	'underpaid'			=> 'underpay',
	'underran'			=> 'underrun',
	'undershot'			=> 'undershoot',
	'undersold'			=> 'undersell',
	'understood'		=> 'understand',
	'undertaken'		=> 'undertake',
	'undertook'			=> 'undertake',
	'underwent'			=> 'undergo',
	'underwritten'		=> 'underwrite',
	'underwrote'		=> 'underwrite',
	'undid'				=> 'undo',
	'undone'			=> 'undo',
	'undrawn'			=> 'undraw',
	'undrew'			=> 'undraw',
	'unfroze'			=> 'unfreeze',
	'unfrozen'			=> 'unfreeze',
	'ungirded'			=> 'ungird',
	'ungirt'			=> 'ungird',
	'unhanged'			=> 'unhang',
	'unhung'			=> 'unhang',
	'unknitted'			=> 'unknit',
	'unladed'			=> 'unlade',
	'unladen'			=> 'unlade',
	'unlaid'			=> 'unlay',
	'unlearned'			=> 'unlearn',
	'unlearnt'			=> 'unlearn',
	'unmade'			=> 'unmake',
	'unreeved'			=> 'unreeve',
	'unrove'			=> 'unreeve',
	'unsaid'			=> 'unsay',
	'unslung'			=> 'unsling',
	'unspoke'			=> 'unspeak',
	'unspoken'			=> 'unspeak',
	'unstrung'			=> 'unstring',
	'unstuck'			=> 'unstick',
	'unswore'			=> 'unswear',
	'unsworn'			=> 'unswear',
	'untaught'			=> 'unteach',
	'unthought'			=> 'unthink',
	'untrod'			=> 'untread',
	'untrodden'			=> 'untread',
	'unwinded'			=> 'unwind',
	'unwound'			=> 'unwind',
	'unwove'			=> 'unweave',
	'unwoven'			=> 'unweave',
	'upbuilt'			=> 'upbuild',
	'upheld'			=> 'uphold',
	'uprisen'			=> 'uprise',
	'uprose'			=> 'uprise',
	'upswept'			=> 'upsweep',
	'upswung'			=> 'upswing',
	'waked'				=> 'wake',
	'was'				=> 'be',
	'waylaid'			=> 'waylay',
	'wedded'			=> 'wed',
	'went'				=> 'go',
	'wept'				=> 'weep',
	'were'				=> 'be',
	'wetted'			=> 'wet',
	'winded'			=> 'wind',
	'wist'				=> 'wit',
	'wot'				=> 'wit',
	'withdrawn'			=> 'withdraw',
	'withdrew'			=> 'withdraw',
	'withheld'			=> 'withhold',
	'withstood'			=> 'withstand',
	'woke'				=> 'wake',
	'woken'				=> 'wake',
	'won'				=> 'win',
	'wore'				=> 'wear',
	'worked'			=> 'work',
	'worn'				=> 'wear',
	'wound'				=> 'wind',
	'wove'				=> 'weave',
	'woven'				=> 'weave',
	'written'			=> 'write',
	'wrote'				=> 'write',
	'wrought'			=> 'work',
	'wrung'				=> 'wring',
}
InfSuffixRules =

Mapping of word suffixes to infinitive rules.

{
	# '<suffix>' => {
	#	:order => <sort order>,
	#	:rule  => <rule number>,

	# :word1 == 0 => Use 0, the index of the longest prefix
	#	within @{$prefix{$self->{'suffix'} } }, below.

	# :word1 == 1 => Use 1, the index of the 2nd longest prefix
	#	within @{$prefix{$self->{'suffix'} } }, below.

	# :word1 == -1 => Use the index of the shortest prefix
	#	within @{$prefix{$self->{'suffix'} } }, below + a letter.

	# :word1 == -2 => Use the index of the shortest prefix
	#	within @{$prefix{$self->{'suffix'} } }, below + a letter,
	#	and use the shortest prefix as well.

	# :word1 == -3 => Use the index of the shortest prefix
	#	within @{$prefix{$self->{'suffix'} } }, below + meter,
	#	and use the shortest prefix + metre as well.

	# :word1 == -4 => Use the original string.
	'hes' => {
		:order		=> 1011,
		:rule		=> '1',
		:word1		=> 0,	# Longest prefix.
		:suffix1	=> '',
		:suffix2	=> '',
	},
	'ses' => {
		:order		=> 1021,
		:rule		=> '2',
		:word1		=> 0,	# Longest prefix.
		:suffix1	=> '',
		:suffix2	=> '',
	},
	'xes' => {
		:order		=> 1031,
		:rule		=> '3',
		:word1		=> 0,	# Longest prefix.
		:suffix1	=> '',
		:suffix2	=> '',
	},
	'zes' => {
		:order		=> 1041,
		:rule		=> '4',
		:word1		=> 0,	# Longest prefix.
		:suffix1	=> '',
		:suffix2	=> '',
	},
	'iless' => {
		:order		=> 1051,
		:rule		=> '43a',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'y',
		:suffix2	=> '',
	},
	'less' => {
		:order		=> 1052,
		:rule		=> '43b',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> '',
		:suffix2	=> '',
	},
	'iness' => {
		:order		=> 1053,
		:rule		=> '44a',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'y',
		:suffix2	=> '',
	},
	'ness' => {
		:order		=> 1054,
		:rule		=> '44b',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> '',
		:suffix2	=> '',
	},
	"'s" => {
		:order		=> 1055,
		:rule		=> '7',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> '',
		:suffix2	=> '',
	},
	'ies' => {
		:order		=> 1056,
		:rule		=> '13a',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'y',
		:suffix2	=> '',
	},
	'es' => {
		:order		=> 1057,
		:rule		=> '13b',
		:word1		=> 0,	# Longest prefix.
		:suffix1	=> '',
		:suffix2	=> '',
	},
	'ss' => {
		:order		=> 1061,
		:rule		=> '6a',
		:word1		=> -4,	# Original string.
		:suffix1	=> '',
		:suffix2	=> '',
	},
	's'	 => {
		:order		=> 1062,
		:rule		=> '6b',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> '',
		:suffix2	=> '',
	},
	'ater' => {
		:order		=> 1081,
		:rule		=> '8',
		:word1		=> -4,	# Original string.
		:suffix1	=> '',
		:suffix2	=> '',
	},
	'cter' => {
		:order		=> 1091,
		:rule		=> '9',
		:word1		=> -4,	# Original string.
		:suffix1	=> '',
		:suffix2	=> '',
	},
	'ier' => {
		:order		=> 1101,
		:rule		=> '10',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'y',
		:suffix2	=> '',
	},
	'er' => {
		:order		=> 1111,
		:rule		=> '11',
		:word1		=> 0,	# Longest prefix.
		:suffix1	=> '',
		:suffix2	=> '',
	},
	'ied' => {
		:order		=> 1121,
		:rule		=> '12a',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'y',
		:suffix2	=> '',
	},
	'ed' => {
		:order		=> 1122,
		:rule		=> '12b',	# There is extra code for 12b below.
		:word1		=> 0,	# Longest prefix.
		:suffix1	=> '',
		:suffix2	=> '',
	},
	'iest' => {
		:order		=> 1141,
		:rule		=> '14a',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'y',
		:suffix2	=> '',
	},
	'est' => {
		:order		=> 1142,
		:rule		=> '14b',
		:word1		=> -2,	# Shortest prefix + a letter, and shortest prefix.
		:suffix1	=> 'e',
		:suffix2	=> '',
	},
	'blity' => {
		:order		=> 1143,
		:rule		=> '21',
		:word1		=> -4,	# Original string.
		:suffix1	=> '',
		:suffix2	=> '',
	},
	'bility' => {
		:order		=> 1144,
		:rule		=> '22',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'ble',
		:suffix2	=> '',
	},
	'fiable' => {
		:order		=> 1145,
		:rule		=> '23',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'fy',
		:suffix2	=> '',
	},
	'logist' => {
		:order		=> 1146,
		:rule		=> '24',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'logy',
		:suffix2	=> '',
	},
	'ing' => {
		:order		=> 1151,
		:rule		=> '15',	# There is extra code for 15 below.
		:word1		=> -2,	# Shortest prefix + a letter, and shortest prefix.
		:suffix1	=> 'e',
		:suffix2	=> '',
	},
	'ist' => {
		:order		=> 1161,
		:rule		=> '16',
		:word1		=> -2,	# Shortest prefix + a letter, and shortest prefix.
		:suffix1	=> 'e',
		:suffix2	=> '',
	},
	'ism' => {
		:order		=> 1171,
		:rule		=> '17',
		:word1		=> -2,	# Shortest prefix + a letter, and shortest prefix.
		:suffix1	=> 'e',
		:suffix2	=> '',
	},
	'ity' => {
		:order		=> 1181,
		:rule		=> '18',
		:word1		=> -2,	# Shortest prefix + a letter, and shortest prefix.
		:suffix1	=> 'e',
		:suffix2	=> '',
	},
	'ize' => {
		:order		=> 1191,
		:rule		=> '19',
		:word1		=> -2,	# Shortest prefix + a letter, and shortest prefix.
		:suffix1	=> 'e',
		:suffix2	=> '',
	},
	'cable' => {
		:order		=> 1201,
		:rule		=> '20a',
		:word1		=> -4,	# Original string.
		:suffix1	=> '',
		:suffix2	=> '',
	},
	'gable' => {
		:order		=> 1202,
		:rule		=> '20b',
		:word1		=> -4,	# Original string.
		:suffix1	=> '',
		:suffix2	=> '',
	},
	'able' => {
		:order		=> 1203,
		:rule		=> '20c',
		:word1		=> -2,	# Shortest prefix + a letter, and shortest prefix.
		:suffix1	=> 'e',
		:suffix2	=> '',
	},
	'graphic' => {
		:order		=> 1251,
		:rule		=> '25',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'graphy',
		:suffix2	=> '',
	},
	'istic' => {
		:order		=> 1261,
		:rule		=> '26',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'ist',
		:suffix2	=> '',
	},
	'itic' => {
		:order		=> 1271,
		:rule		=> '27',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'ite',
		:suffix2	=> '',
	},
	'like' => {
		:order		=> 1281,
		:rule		=> '28',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> '',
		:suffix2	=> '',
	},
	'logic' => {
		:order		=> 1291,
		:rule		=> '29',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'logy',
		:suffix2	=> '',
	},
	'ment' => {
		:order		=> 1301,
		:rule		=> '30',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> '',
		:suffix2	=> '',
	},
	'mental' => {
		:order		=> 1311,
		:rule		=> '31',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'ment',
		:suffix2	=> '',
	},
	'metry' => {
		:order		=> 1321,
		:rule		=> '32',
		:word1		=> -3,	# Shortest prefix + meter, and shortest perfix + metre.
		:suffix1	=> 'meter',
		:suffix2	=> 'metre',
	},
	'nce' => {
		:order		=> 1331,
		:rule		=> '33',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'nt',
		:suffix2	=> '',
	},
	'ncy' => {
		:order		=> 1341,
		:rule		=> '34',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'nt',
		:suffix2	=> '',
	},
	'ship' => {
		:order		=> 1351,
		:rule		=> '35',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> '',
		:suffix2	=> '',
	},
	'ical' => {
		:order		=> 1361,
		:rule		=> '36',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'ic',
		:suffix2	=> '',
	},
	'ional' => {
		:order		=> 1371,
		:rule		=> '37',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'ion',
		:suffix2	=> '',
	},
	'bly' => {
		:order		=> 1381,
		:rule		=> '38',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'ble',
		:suffix2	=> '',
	},
	'ily' => {
		:order		=> 1391,
		:rule		=> '39',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'y',
		:suffix2	=> '',
	},
	'ly' => {
		:order		=> 1401,
		:rule		=> '40',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> '',
		:suffix2	=> '',
	},
	'iful' => {
		:order		=> 1411,
		:rule		=> '41a',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'y',
		:suffix2	=> '',
	},
	'ful' => {
		:order		=> 1412,
		:rule		=> '41b',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> '',
		:suffix2	=> '',
	},
	'ihood' => {
		:order		=> 1421,
		:rule		=> '42a',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'y',
		:suffix2	=> '',
	},
	'hood' => {
		:order		=> 1422,
		:rule		=> '42b',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> '',
		:suffix2	=> '',
	},
	'ification' => {
		:order		=> 1451,
		:rule		=> '45',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'ify',
		:suffix2	=> '',
	},
	'ization' => {
		:order		=> 1461,
		:rule		=> '46',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'ize',
		:suffix2	=> '',
	},
	'ction' => {
		:order		=> 1471,
		:rule		=> '47',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'ct',
		:suffix2	=> '',
	},
	'rtion' => {
		:order		=> 1481,
		:rule		=> '48',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'rt',
		:suffix2	=> '',
	},
	'ation' => {
		:order		=> 1491,
		:rule		=> '49',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'ate',
		:suffix2	=> '',
	},
	'ator' => {
		:order		=> 1501,
		:rule		=> '50',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'ate',
		:suffix2	=> '',
	},
	'ctor' => {
		:order		=> 1511,
		:rule		=> '51',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'ct',
		:suffix2	=> '',
	},
	'ive' => {
		:order		=> 1521,
		:rule		=> '52',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'ion',
		:suffix2	=> '',
	},
	'onian' => {
		:order		=> 1530,
		:rule		=> '54',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'on',
		:suffix2	=> '',
	},
	'an' => {
		:order		=> 1531,
		:rule		=> '53',
		:word1		=> -1,	# Shortest prefix.
		:suffix1	=> 'a',
		:suffix2	=> '',
	},
}
InfSuffixRuleOrder =
InfSuffixRules.keys.sort_by {|rule| InfSuffixRules[rule][:order]}

Class Attribute Summary collapse

Class Method Summary collapse

Class Attribute Details

.lprintf_formattersObject

Returns the value of attribute lprintf_formatters.



103
104
105
# File 'lib/linguistics/en.rb', line 103

def lprintf_formatters
  @lprintf_formatters
end

Class Method Details

.a(phrase, count = nil) ⇒ Object Also known as: an

Return the given phrase with the appropriate indefinite article (“a” or “an”) prepended.



1168
1169
1170
1171
1172
1173
1174
1175
# File 'lib/linguistics/en.rb', line 1168

def a( phrase, count=nil )
	md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
	pre, word, post = md.to_a[1,3]
	return phrase if word.nil? or word.empty?

	result = indef_article( word, count )
	return pre + result + post
end

.camel_case_to_english(string) ⇒ Object

Turns a camel-case string (“camelCaseToEnglish”) to plain English (“camel case to english”). Each word is decapitalized.



1601
1602
1603
1604
1605
# File 'lib/linguistics/en.rb', line 1601

def camel_case_to_english( string )
	string.to_s.
		gsub( /([A-Z])([A-Z])/ ) { "#$1 #$2" }.
		gsub( /([a-z])([A-Z])/ ) { "#$1 #$2" }.downcase
end

.conjunction(obj, args = {}) ⇒ Object

Return the specified obj (which must support the #collect method) as a conjunction. Each item is converted to a String if it is not already (using #to_s) unless a block is given, in which case it is called once for each object in the array, and the stringified return value from the block is used instead. Returning nil causes that particular element to be omitted from the resulting conjunction. The following options can be used to control the makeup of the returned conjunction String:

:separator

Specify one or more characters to separate items in the resulting list. Defaults to ', '.

:altsep

An alternate separator to use if any of the resulting conjunction’s clauses contain the :separator character/s. Defaults to '; '.

:penultimate

Flag that indicates whether or not to join the last clause onto the rest of the conjunction using a penultimate :separator. E.g.,

%w{duck, cow, dog}.en.conjunction
# => "a duck, a cow, and a dog"
%w{duck cow dog}.en.conjunction( :penultimate => false )
"a duck, a cow and a dog"

Default to true.

:conjunctive

Sets the word used as the conjunctive (separating word) of the resulting string. Default to 'and'.

:combine

If set to true (the default), items which are indentical (after surrounding spaces are stripped) will be combined in the resulting conjunction. E.g.,

%w{goose cow goose dog}.en.conjunction
# => "two geese, a cow, and a dog"
%w{goose cow goose dog}.en.conjunction( :combine => false )
# => "a goose, a cow, a goose, and a dog"
:casefold

If set to true (the default), then items are compared case-insensitively when combining them. This has no effect if :combine is false.

:generalize

If set to true, then quantities of combined items are turned into general descriptions instead of exact amounts.

ary = %w{goose pig dog horse goose reindeer goose dog horse}
ary.en.conjunction
# => "three geese, two dogs, two horses, a pig, and a reindeer"
ary.en.conjunction( :generalize => true )
# => "several geese, several dogs, several horses, a pig, and a reindeer"

See the #quantify method for specifics on how quantities are generalized. Generalization defaults to false, and has no effect if :combine is false.

:quantsort

If set to true (the default), items which are combined in the resulting conjunction will be listed in order of amount, with greater quantities sorted first. If :quantsort is false, combined items will appear where the first instance of them occurred in the list. This sort is also the fallback for indentical quantities (ie., items of the same quantity will be listed in the order they appeared in the source list).



1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
# File 'lib/linguistics/en.rb', line 1490

def conjunction( obj, args={} )
	config = ConjunctionDefaults.merge( args )
	phrases = []

	# Transform items in the obj to phrases
	if block_given?
		phrases = obj.collect {|item| yield(item) }.compact
	else
		phrases = obj.collect {|item| item.to_s }
	end

	# No need for a conjunction if there's only one thing
	return a(phrases[0]) if phrases.length < 2

	# Set up a Proc to derive a collector key from a phrase depending on the
	# configuration
	keyfunc =
		if config[:casefold]
			proc {|key| key.downcase.strip}
		else
			proc {|key| key.strip}
		end

	# Count and delete phrases that hash the same when the keyfunc munges
	# them into the same thing if we're combining (:combine => true).
	collector = {}
	if config[:combine]

		phrases.each_index do |i|
			# Stop when reaching the end of a truncated list
			break if phrases[i].nil?

			# Make the key using the configured key function
			phrase = keyfunc[ phrases[i] ]

			# If the collector already has this key, increment its count,
			# eliminate the duplicate from the phrase list, and redo the loop.
			if collector.key?( phrase )
				collector[ phrase ] += 1
				phrases.delete_at( i )
				redo
			end

			collector[ phrase ] = 1
		end
	else
		# If we're not combining, just make everything have a count of 1.
		phrases.uniq.each {|key| collector[ keyfunc[key] ] = 1}
	end

	# If sort-by-quantity is turned on, sort the phrases first by how many
	# there are (most-first), and then by the order they were specified in.
	if config[:quantsort] && config[:combine]
		origorder = {}
		phrases.each_with_index {|phrase,i| origorder[ keyfunc[phrase] ] ||= i }
		phrases.sort! {|a,b|
			(collector[ keyfunc[b] ] <=> collector[ keyfunc[a] ]).nonzero? ||
			(origorder[ keyfunc[a] ] <=> origorder[ keyfunc[b] ])
		}
	end

	# Set up a filtering function that adds either an indefinite article, an
	# indefinite quantifier, or a definite quantifier to each phrase
	# depending on the configuration and the count of phrases in the
	# collector.
	filter =
		if config[:generalize]
			proc {|phrase, count| quantify(phrase, count) }
		else
			proc {|phrase, count|
			if count > 1
				"%s %s" % [
					# :TODO: Make this threshold settable
					count < 10 ? count.en.numwords : count.to_s,
					plural(phrase, count)
				]
			else
				a( phrase )
			end
		}
		end

	# Now use the configured filter to turn each phrase into its final
	# form. Hmmm... square-bracket Lisp?
	phrases.collect! {|phrase| filter[phrase, collector[ keyfunc[phrase] ]] }

	# Prepend the conjunctive to the last element unless it's empty or
	# there's only one element
	phrases[-1].insert( 0, config[:conjunctive] + " " ) unless
		config[:conjunctive].strip.empty? or
		phrases.length < 2

	# Concatenate the last two elements if there's no penultimate separator,
	# and pick a separator based on how many phrases there are and whether
	# or not there's already an instance of it in the phrases.
	phrase_count = phrases.length
	phrases[-2] << " " << phrases.pop unless config[:penultimate]
	sep = config[:separator]
	if phrase_count <= 2
		sep = ' '
	elsif phrases.find {|str| str.include?(config[:separator]) }
		sep = config[:altsep]
	end

	return phrases.join( sep )
end

.debug_msg(*msgs) ⇒ Object

Debugging output



663
664
665
# File 'lib/linguistics/en.rb', line 663

def debug_msg( *msgs ) # :nodoc:
	$stderr.puts msgs.join(" ") if $DEBUG
end

.def_lprintf_formatter(name, meth) ⇒ Object

Add the specified method (which can be either a Method object or a Symbol for looking up a method)



108
109
110
111
# File 'lib/linguistics/en.rb', line 108

def self::def_lprintf_formatter( name, meth )
	meth = self.method( meth ) unless meth.is_a?( Method )
	self.lprintf_formatters[ name ] = meth
end

.def_synset_function(meth) ⇒ Object

Make a function that calls the method meth on the synset of an input word.



121
122
123
124
125
126
127
128
129
130
131
132
133
# File 'lib/linguistics/en/wordnet.rb', line 121

def def_synset_function( meth )
	(class << self; self; end).instance_eval do
		define_method( meth ) {|*args|
			word, pos, sense = *args
			raise ArgumentError,
				"wrong number of arguments (0 for 1)" unless word
			sense ||= 1

			syn = synset( word.to_s, pos, sense )
			return syn.nil? ? nil : syn.send( meth )
		}
	end
end

.english_to_camel_case(string) ⇒ Object

Turns an English language string into a CamelCase word.



1609
1610
1611
# File 'lib/linguistics/en.rb', line 1609

def english_to_camel_case( string )
	string.to_s.gsub( /\s+([a-z])/ ) { $1.upcase }
end

Returns true if LinkParser was loaded okay

Returns:

  • (Boolean)


75
# File 'lib/linguistics/en/linkparser.rb', line 75

def has_link_parser? ; @has_link_parser ; end

.has_wordnet?Boolean

Returns true if WordNet was loaded okay

Returns:

  • (Boolean)


101
# File 'lib/linguistics/en/wordnet.rb', line 101

def has_wordnet? ; @has_wordnet; end

.indef_article(word, count) ⇒ Object

Returns the given word with a prepended indefinite article, unless count is non-nil and not singular.



940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
# File 'lib/linguistics/en.rb', line 940

def indef_article( word, count )
	count ||= Linguistics::num
	return "#{count} #{word}" if
		count && /^(#{PL_count_one})$/i !~ count.to_s

	# Handle user-defined variants
	# return value if value = ud_match( word, A_a_user_defined )

	case word

	# Handle special cases
	when /^(#{A_explicit_an})/i
		return "an #{word}"

	# Handle abbreviations
	when /^(#{A_abbrev})/x
		return "an #{word}"
	when /^[aefhilmnorsx][.-]/i
		return "an #{word}"
	when /^[a-z][.-]/i
		return "a #{word}"

	# Handle consonants
	when /^[^aeiouy]/i
		return "a #{word}"

	# Handle special vowel-forms
	when /^e[uw]/i
		return "a #{word}"
	when /^onc?e\b/i
		return "a #{word}"
	when /^uni([^nmd]|mo)/i
		return "a #{word}"
	when /^u[bcfhjkqrst][aeiou]/i
		return "a #{word}"

	# Handle vowels
	when /^[aeiou]/i
		return "an #{word}"

	# Handle y... (before certain consonants implies (unnaturalized) "i.." sound)
	when /^(#{A_y_cons})/i
		return "an #{word}"

	# Otherwise, guess "a"
	else
		return "a #{word}"
	end
end

.infinitive(word) ⇒ Object

Return the infinitive form of the given word



1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
# File 'lib/linguistics/en/infinitive.rb', line 1048

def infinitive( word )
	word = word.to_s
	word1 = word2 = suffix = rule = newword = ''

	if IrregularInfinitives.key?( word )
		word1	= IrregularInfinitives[ word ]
		rule	= 'irregular'
	else
		# Build up $prefix{$suffix} as an array of prefixes, from longest to shortest.
		prefix, suffix = nil
		prefixes = Hash::new {|hsh,key| hsh[key] = []}

		# Build the hash of prefixes for the word
		1.upto( word.length ) {|i|
			prefix = word[0, i]
			suffix = word[i..-1]

			(suffix.length - 1).downto( 0 ) {|j|
				newword = prefix + suffix[0, j]
				prefixes[ suffix ].push( newword )
			}
		}

		$stderr.puts "prefixes: %p" % prefixes if $DEBUG

		# Now check for rules covering the prefixes for this word, picking
		# the first one if one was found.
		if (( suffix = ((InfSuffixRuleOrder & prefixes.keys).first) ))
			rule = InfSuffixRules[ suffix ][:rule]
			shortestPrefix = InfSuffixRules[ suffix ][:word1]
			$stderr.puts "Using rule %p (%p) for suffix %p" % 
				[ rule, shortestPrefix, suffix ] if $DEBUG

			case shortestPrefix
			when 0
				word1 = prefixes[ suffix ][ 0 ]
				word2 = prefixes[ suffix ][ 1 ]
				$stderr.puts "For sp = 0: word1: %p, word2: %p" %
					[ word1, word2 ] if $DEBUG

			when -1
				word1 = prefixes[ suffix ].last +
					InfSuffixRules[ suffix ][:suffix1]
				word2 = ''
				$stderr.puts "For sp = -1: word1: %p, word2: %p" %
					[ word1, word2 ] if $DEBUG

			when -2
				word1 = prefixes[ suffix ].last +
					InfSuffixRules[ suffix ][:suffix1]
				word2 = prefixes[ suffix ].last
				$stderr.puts "For sp = -2: word1: %p, word2: %p" %
					[ word1, word2 ] if $DEBUG

			when -3
				word1 = prefixes[ suffix ].last +
					InfSuffixRules[ suffix ][:suffix1]
				word2 = prefixes[ suffix ].last +
					InfSuffixRules[ suffix ][:suffix2]
				$stderr.puts "For sp = -3: word1: %p, word2: %p" %
					[ word1, word2 ] if $DEBUG

			when -4
				word1 = word
				word2 = ''
				$stderr.puts "For sp = -4: word1: %p, word2: %p" %
					[ word1, word2 ] if $DEBUG

			else
				raise IndexError,
					"Couldn't find rule for shortest prefix %p" %
					shortestPrefix
			end

			# Rules 12b and 15: Strip off 'ed' or 'ing'.
			if rule == '12b' or rule == '15'
				# Do we have a monosyllable of this form:
				# o 0+ Consonants
				# o 1+ Vowel
				# o	2 Non-wx
				# Eg: tipped => tipp?
				# Then return tip and tipp.
				# Eg: swimming => swimm?
				# Then return tipswim and swimm.

				if /^([^aeiou]*[aeiou]+)([^wx])\2$/ =~ word2
					word1 = $1 + $2
					word2 = $1 + $2 + $2
				end
			end
		end
	end

	return Infinitive::new( word1, word2, suffix, rule )
end

.language(unused = nil) ⇒ Object

Return the name of the language this module is for.



1099
1100
1101
# File 'lib/linguistics/en.rb', line 1099

def language( unused=nil )
	"English"
end

.lp_dictObject

The instance of LinkParser used for all Linguistics LinkParser functions.



83
84
85
86
87
88
89
90
91
# File 'lib/linguistics/en/linkparser.rb', line 83

def lp_dict
	if @lp_error
		raise NotImplementedError, 
			"LinkParser functions are not loaded: %s" %
			@lp_error.message
	end

	return @lp_dict ||= LinkParser::Dictionary.new( :verbosity => 0 )
end

.lp_errorObject

If #has_link_parser? returns false, this can be called to fetch the exception which was raised when trying to load LinkParser.



79
# File 'lib/linguistics/en/linkparser.rb', line 79

def lp_error ; @lp_error ; end

.lprintf(fmt, *args) ⇒ Object

Format the given fmt string by replacing %-escaped sequences with the result of performing a specified operation on the corresponding argument, ala Kernel.sprintf.

%PL

Plural.

%A, %AN

Prepend indefinite article.

%NO

Zero-quantified phrase.

%NUMWORDS

Convert a number into the corresponding words.

%CONJUNCT

Conjunction.



1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
# File 'lib/linguistics/en.rb', line 1684

def lprintf( fmt, *args )
	fmt.to_s.gsub( /%([A-Z_]+)/ ) do |match|
		op = $1.to_s.upcase.to_sym
		if self.lprintf_formatters.key?( op )
			arg = args.shift
			self.lprintf_formatters[ op ].call( arg )
		else
			raise "no such formatter %p" % op
		end
	end
end

.matchgroup(*parts) ⇒ Object

Wrap one or more parts in a non-capturing alteration Regexp



95
96
97
98
# File 'lib/linguistics/en.rb', line 95

def self::matchgroup( *parts )
	re = parts.flatten.join("|")
	"(?:#{re})"
end

.no(phrase, count = nil) ⇒ Object

Translate zero-quantified phrase to “no phrase.plural



1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
# File 'lib/linguistics/en.rb', line 1182

def no( phrase, count=nil )
	md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
	pre, word, post = md.to_a[1,3]
	count ||= Linguistics::num || 0

	unless /^#{PL_count_zero}$/ =~ count.to_s
		return "#{pre}#{count} " + plural( word, count ) + post
	else
		return "#{pre}no " + plural( word, 0 ) + post
	end
end

.normalize_count(count, default = 2) ⇒ Object

Normalize a count to either 1 or 2 (singular or plural)



669
670
671
672
673
674
675
676
677
678
# File 'lib/linguistics/en.rb', line 669

def normalize_count( count, default=2 )
	return default if count.nil? # Default to plural
	if /^(#{PL_count_one})$/i =~ count.to_s ||
			Linguistics::classical? &&
			/^(#{PL_count_zero})$/ =~ count.to_s
		return 1
	else
		return default
	end
end

.number_to_words(num, config) ⇒ Object

Return the specified number num as an array of number phrases.



1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
# File 'lib/linguistics/en.rb', line 1045

def number_to_words( num, config )
	return [config[:zero]] if num.to_i.zero?
	chunks = []

	# Break into word-groups if groups is set
	if config[:group].nonzero?

		# Build a Regexp with <config[:group]> number of digits. Any past
		# the first are optional.
		re = Regexp::new( "(\\d)" + ("(\\d)?" * (config[:group] - 1)) )

		# Scan the string, and call the word-chunk function that deals with
		# chunks of the found number of digits.
		num.to_s.scan( re ) {|digits|
			debug_msg "   digits = #{digits.inspect}"
			fn = NumberToWordsFunctions[ digits.nitems ]
			numerals = digits.flatten.compact.collect {|i| i.to_i}
			debug_msg "   numerals = #{numerals.inspect}"
			chunks.push fn.call( config[:zero], *numerals ).strip
		}
	else
		phrase = num.to_s
		phrase.sub!( /\A\s*0+/, '' )
		mill = 0

		# Match backward from the end of the digits in the string, turning
		# chunks of three, of two, and of one into words.
		mill += 1 while
			phrase.sub!( /(\d)(\d)(\d)(?=\D*\Z)/ ) {
				words = to_hundreds( $1.to_i, $2.to_i, $3.to_i, mill, 
									 config[:and] )
				chunks.unshift words.strip.squeeze(' ') unless words.nil?
				''
			}

		phrase.sub!( /(\d)(\d)(?=\D*\Z)/ ) {
			chunks.unshift to_tens( $1.to_i, $2.to_i, mill ).strip.squeeze(' ')
			''
		}
		phrase.sub!( /(\d)(?=\D*\Z)/ ) {
			chunks.unshift to_units( $1.to_i, mill ).strip.squeeze(' ')
			''
		}
	end

	return chunks
end

.numwords(number, hashargs = {}) ⇒ Object

Return the specified number as english words. One or more configuration values may be passed to control the returned String:

:group

Controls how many numbers at a time are grouped together. Valid values are 0 (normal grouping), 1 (single-digit grouping, e.g., “one, two, three, four”), 2 (double-digit grouping, e.g., “twelve, thirty-four”, or 3 (triple-digit grouping, e.g., “one twenty-three, four”).

:comma

Set the character/s used to separate word groups. Defaults to ", ".

:and

Set the word and/or characters used where ' and ' (the default) is normally used. Setting :and to ' ', for example, will cause 2556 to be returned as “two-thousand, five hundred fifty-six” instead of “two-thousand, five hundred and fifty-six”.

:zero

Set the word used to represent the numeral 0 in the result. 'zero' is the default.

:decimal

Set the translation of any decimal points in the number; the default is 'point'.

:asArray

If set to a true value, the number will be returned as an array of word groups instead of a String.



1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
# File 'lib/linguistics/en.rb', line 1242

def numwords( number, hashargs={} )
	num = number.to_s
	config = NumwordDefaults.merge( hashargs )
	raise "Bad chunking option: #{config[:group]}" unless
		config[:group].between?( 0, 3 )

	# Array of number parts: first is everything to the left of the first
	# decimal, followed by any groups of decimal-delimted numbers after that
	parts = []

	# Wordify any sign prefix
	sign = (/\A\s*\+/ =~ num) ? 'plus' : (/\A\s*\-/ =~ num) ? 'minus' : ''

	# Strip any ordinal suffixes
	ord = true if num.sub!( /(st|nd|rd|th)\Z/, '' )

	# Split the number into chunks delimited by '.'
	chunks = if !config[:decimal].empty? then
				 if config[:group].nonzero?
					 num.split(/\./)
				 else
					 num.split(/\./, 2)
				 end
			 else
				 [ num ]
			 end

	# Wordify each chunk, pushing arrays into the parts array
	chunks.each_with_index {|chunk,section|
		chunk.gsub!( /\D+/, '' )

		# If there's nothing in this chunk of the number, set it to zero
		# unless it's the whole-number part, in which case just push an
		# empty array.
		if chunk.empty?
			if section.zero?
				parts.push []
				next 
			end
		end

		# Split the number section into wordified parts unless this is the
		# second or succeeding part of a non-group number
		unless config[:group].zero? && section.nonzero?
			parts.push number_to_words( chunk, config )
		else
			parts.push number_to_words( chunk, config.merge(:group => 1) )
		end					
	}

	debug_msg "Parts => #{parts.inspect}"

	# Turn the last word of the whole-number part back into an ordinal if
	# the original number came in that way.
	if ord && !parts[0].empty?
		parts[0][-1] = ordinal( parts[0].last )
	end

	# If the caller's expecting an Array return, just flatten and return the
	# parts array.
	if config[:asArray]
		unless sign.empty?
			parts[0].unshift( sign )
		end
		return parts.flatten
	end

	# Catenate each sub-parts array into a whole number part and one or more
	# post-decimal parts. If grouping is turned on, all sub-parts get joined
	# with commas, otherwise just the whole-number part is.
	if config[:group].zero?
		if parts[0].length > 1

			# Join all but the last part together with commas
			wholenum = parts[0][0...-1].join( config[:comma] )

			# If the last part is just a single word, append it to the
			# wholenum part with an 'and'. This is to get things like 'three
			# thousand and three' instead of 'three thousand, three'.
			if /^\s*(\S+)\s*$/ =~ parts[0].last
				wholenum += config[:and] + parts[0].last
			else
				wholenum += config[:comma] + parts[0].last
			end
		else
			wholenum = parts[0][0]
		end
		decimals = parts[1..-1].collect {|part| part.join(" ")}

		debug_msg "Wholenum: #{wholenum.inspect}; decimals: #{decimals.inspect}"

		# Join with the configured decimal; if it's empty, just join with
		# spaces.
		unless config[:decimal].empty?
			return sign + ([ wholenum ] + decimals).
				join( " #{config[:decimal]} " ).strip
		else
			return sign + ([ wholenum ] + decimals).
				join( " " ).strip
		end
	else
		return parts.compact.
			separate( config[:decimal] ).
			delete_if {|el| el.empty?}.
			join( config[:comma] ).
			strip
	end
end

.ordinal(number) ⇒ Object

Transform the given number into an ordinal word. The number object can be either an Integer or a String.



1355
1356
1357
1358
1359
1360
1361
1362
1363
# File 'lib/linguistics/en.rb', line 1355

def ordinal( number )
	case number
	when Integer
		return number.to_s + (Nth[ number % 100 ] || Nth[ number % 10 ])

	else
		return number.to_s.sub( /(#{OrdinalSuffixes})\Z/ ) { Ordinals[$1] }
	end
end

.ordinate(number) ⇒ Object

Transform the given number into an ordinate word.



1368
1369
1370
# File 'lib/linguistics/en.rb', line 1368

def ordinate( number )
	return Linguistics::EN.ordinal( Linguistics::EN.numwords(number) )
end

.plural(phrase, count = nil) ⇒ Object

Return the plural of the given phrase if count indicates it should be plural.



1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
# File 'lib/linguistics/en.rb', line 1106

def plural( phrase, count=nil )
	phrase = numwords( phrase ) if phrase.is_a?( Numeric )

	md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
	pre, word, post = md.to_a[1,3]
	return phrase if word.nil? or word.empty?

	plural = postprocess( word,
		pluralize_special_adjective(word, count) ||
		pluralize_special_verb(word, count) ||
		pluralize_noun(word, count) )

	return pre + plural + post
end

.plural_adjective(phrase, count = nil) ⇒ Object Also known as: plural_adj

Return the plural of the given adjectival phrase if count indicates it should be plural.



1153
1154
1155
1156
1157
1158
1159
1160
1161
# File 'lib/linguistics/en.rb', line 1153

def plural_adjective( phrase, count=nil )
	md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
	pre, word, post = md.to_a[1,3]
	return phrase if word.nil? or word.empty?

	plural = postprocess( word,
		pluralize_special_adjective(word, count) || word )
	return pre + plural + post
end

.plural_noun(phrase, count = nil) ⇒ Object

Return the plural of the given noun phrase if count indicates it should be plural.



1125
1126
1127
1128
1129
1130
1131
1132
# File 'lib/linguistics/en.rb', line 1125

def plural_noun( phrase, count=nil )
	md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
	pre, word, post = md.to_a[1,3]
	return phrase if word.nil? or word.empty?

	plural = postprocess( word, pluralize_noun(word, count) )
	return pre + plural + post
end

.plural_verb(phrase, count = nil) ⇒ Object

Return the plural of the given verb phrase if count indicates it should be plural.



1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
# File 'lib/linguistics/en.rb', line 1138

def plural_verb( phrase, count=nil )
	md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
	pre, word, post = md.to_a[1,3]
	return phrase if word.nil? or word.empty?

	plural = postprocess( word,
		pluralize_special_verb(word, count) ||
		pluralize_general_verb(word, count) )
	return pre + plural + post
end

.pluralize_general_verb(word, count) ⇒ Object

Pluralize regular verbs



878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
# File 'lib/linguistics/en.rb', line 878

def pluralize_general_verb( word, count )
	count ||= Linguistics::num
	count = normalize_count( count )

	return word if /^(#{PL_count_one})$/i =~ count.to_s

	case word

	# Handle ambiguous present tenses  (simple and compound)
	when /^(#{PL_v_ambiguous_pres})((\s.*)?)$/i
		return PL_v_ambiguous_pres_h[ $1.downcase ] + $2

	# Handle ambiguous preterite and perfect tenses
	when /^(#{PL_v_ambiguous_non_pres})((\s.*)?)$/i
		return word

	# Otherwise, 1st or 2nd person is uninflected
	else
		return word
	end
end

.pluralize_noun(word, count = nil) ⇒ Object

Pluralize nouns



705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
# File 'lib/linguistics/en.rb', line 705

def pluralize_noun( word, count=nil )
	value = nil
	count ||= Linguistics::num
	count = normalize_count( count )

	return word if count == 1

	# Handle user-defined nouns
	#if value = ud_match( word, PL_sb_user_defined )
	#	return value
	#end

	# Handle empty word, singular count and uninflected plurals
	case word
	when ''
		return word
	when /^(#{PL_sb_uninflected})$/i
		return word
	else
		if Linguistics::classical? &&
		   /^(#{PL_sb_uninflected_herd})$/i =~ word
			return word
		end
	end

	# Handle compounds ("Governor General", "mother-in-law", "aide-de-camp", etc.)
	case word
	when /^(?:#{PL_sb_postfix_adj})$/i
		value = $2
		return pluralize_noun( $1, 2 ) + value

	when /^(?:#{PL_sb_prep_dual_compound})$/i
		value = [ $2, $3 ] 
		return pluralize_noun( $1, 2 ) + value[0] + pluralize_noun( value[1] )

	when /^(?:#{PL_sb_prep_compound})$/i
		value = $2 
		return pluralize_noun( $1, 2 ) + value

	# Handle pronouns
	when /^((?:#{PL_prep})\s+)(#{PL_pron_acc})$/i
		return $1 + PL_pron_acc_h[ $2.downcase ]

	when /^(#{PL_pron_nom})$/i
		return PL_pron_nom_h[ word.downcase ]

	when /^(#{PL_pron_acc})$/i
		return PL_pron_acc_h[ $1.downcase ]

	# Handle isolated irregular plurals 
	when /(.*)\b(#{PL_sb_irregular})$/i
		return $1 + PL_sb_irregular_h[ $2.downcase ]

	when /(#{PL_sb_U_man_mans})$/i
		return "#{$1}s"

	# Handle families of irregular plurals
	when /(.*)man$/i ;					return "#{$1}men"
	when /(.*[ml])ouse$/i ;				return "#{$1}ice"
	when /(.*)goose$/i ;				return "#{$1}geese"
	when /(.*)tooth$/i ;				return "#{$1}teeth"
	when /(.*)foot$/i ;					return "#{$1}feet"

	# Handle unassimilated imports
	when /(.*)ceps$/i ;					return word
	when /(.*)zoon$/i ;					return "#{$1}zoa"
	when /(.*[csx])is$/i ;				return "#{$1}es"
	when /(#{PL_sb_U_ex_ices})ex$/i;	return "#{$1}ices"
	when /(#{PL_sb_U_ix_ices})ix$/i;	return "#{$1}ices"
	when /(#{PL_sb_U_um_a})um$/i ;		return "#{$1}a"
	when /(#{PL_sb_U_us_i})us$/i ;		return "#{$1}i"
	when /(#{PL_sb_U_on_a})on$/i ;		return "#{$1}a"
	when /(#{PL_sb_U_a_ae})$/i ;		return "#{$1}e"
	end

	# Handle incompletely assimilated imports
	if Linguistics::classical?
		case word
		when /(.*)trix$/i ;				return "#{$1}trices"
		when /(.*)eau$/i ;				return "#{$1}eaux"
		when /(.*)ieu$/i ;				return "#{$1}ieux"
		when /(.{2,}[yia])nx$/i ;		return "#{$1}nges"
		when /(#{PL_sb_C_en_ina})en$/i; return "#{$1}ina"
		when /(#{PL_sb_C_ex_ices})ex$/i;	return "#{$1}ices"
		when /(#{PL_sb_C_ix_ices})ix$/i;	return "#{$1}ices"
		when /(#{PL_sb_C_um_a})um$/i ;	return "#{$1}a"
		when /(#{PL_sb_C_us_i})us$/i ;	return "#{$1}i"
		when /(#{PL_sb_C_us_us})$/i ;	return "#{$1}"
		when /(#{PL_sb_C_a_ae})$/i ;	return "#{$1}e"
		when /(#{PL_sb_C_a_ata})a$/i ;	return "#{$1}ata"
		when /(#{PL_sb_C_o_i})o$/i ;	return "#{$1}i"
		when /(#{PL_sb_C_on_a})on$/i ;	return "#{$1}a"
		when /#{PL_sb_C_im}$/i ;		return "#{word}im"
		when /#{PL_sb_C_i}$/i ;			return "#{word}i"
		end
	end


	# Handle singular nouns ending in ...s or other silibants
	case word
	when /^(#{PL_sb_singular_s})$/i;	return "#{$1}es"
	when /^([A-Z].*s)$/;				return "#{$1}es"
	when /(.*)([cs]h|[zx])$/i ;			return "#{$1}#{$2}es"
	# when /(.*)(us)$/i ;				return "#{$1}#{$2}es"

	# Handle ...f -> ...ves
	when /(.*[eao])lf$/i ;				return "#{$1}lves"; 
	when /(.*[^d])eaf$/i ;				return "#{$1}eaves"
	when /(.*[nlw])ife$/i ;				return "#{$1}ives"
	when /(.*)arf$/i ;					return "#{$1}arves"

	# Handle ...y
	when /(.*[aeiou])y$/i ;				return "#{$1}ys"
	when /([A-Z].*y)$/ ;				return "#{$1}s"
	when /(.*)y$/i ;					return "#{$1}ies"

	# Handle ...o
	when /#{PL_sb_U_o_os}$/i ;			return "#{word}s"
	when /[aeiou]o$/i ;					return "#{word}s"
	when /o$/i ;						return "#{word}es"

	# Otherwise just add ...s
	else
		return "#{word}s"
	end
end

.pluralize_special_adjective(word, count) ⇒ Object

Handle special adjectives



902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
# File 'lib/linguistics/en.rb', line 902

def pluralize_special_adjective( word, count )
	count ||= Linguistics::num
	count = normalize_count( count )

	return word if /^(#{PL_count_one})$/i =~ count.to_s

	# Handle user-defined verbs
	#if value = ud_match( word, PL_adj_user_defined )
	#	return value
	#end

	case word

	# Handle known cases
	when /^(#{PL_adj_special})$/i
		return PL_adj_special_h[ $1.downcase ]

	# Handle possessives
	when /^(#{PL_adj_poss})$/i
		return PL_adj_poss_h[ $1.downcase ]

	when /^(.*)'s?$/
		pl = plural_noun( $1 )
		if /s$/ =~ pl
			return "#{pl}'"
		else
			return "#{pl}'s"
		end

	# Otherwise, no idea
	else
		return nil
	end
end

.pluralize_special_verb(word, count) ⇒ Object

Pluralize special verbs



835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
# File 'lib/linguistics/en.rb', line 835

def pluralize_special_verb( word, count )
	count ||= Linguistics::num
	count = normalize_count( count )

	return nil if /^(#{PL_count_one})$/i =~ count.to_s

	# Handle user-defined verbs
	#if value = ud_match( word, PL_v_user_defined )
	#	return value
	#end

	case word

	# Handle irregular present tense (simple and compound)
	when /^(#{PL_v_irregular_pres})((\s.*)?)$/i
		return PL_v_irregular_pres_h[ $1.downcase ] + $2

	# Handle irregular future, preterite and perfect tenses 
	when /^(#{PL_v_irregular_non_pres})((\s.*)?)$/i
		return word

	# Handle special cases
	when /^(#{PL_v_special_s})$/, /\s/
		return nil

	# Handle standard 3rd person (chop the ...(e)s off single words)
	when /^(.*)([cs]h|[x]|zz|ss)es$/i
		return $1 + $2
	when /^(..+)ies$/i
		return "#{$1}y"
	when /^(.+)oes$/i
		return "#{$1}o"
	when /^(.*[^s])s$/i
		return $1

	# Otherwise, a regular verb (handle elsewhere)
	else
		return nil
	end
end

.postprocess(original, inflected) ⇒ Object

Do normal/classical switching and match capitalization in inflected by examining the original input.



683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
# File 'lib/linguistics/en.rb', line 683

def postprocess( original, inflected )
	inflected.sub!( /([^|]+)\|(.+)/ ) {
		Linguistics::classical? ? $2 : $1
	}

	case original
	when "I"
		return inflected
	when /^[A-Z]+$/
		return inflected.upcase
	when /^[A-Z]/
		# Can't use #capitalize, as it will downcase the rest of the string,
		# too.
		inflected[0,1] = inflected[0,1].upcase
		return inflected
	else
		return inflected
	end
end

.present_participle(word) ⇒ Object Also known as: part_pres

Participles



1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
# File 'lib/linguistics/en.rb', line 1197

def present_participle( word )
       plural = plural_verb( word.to_s, 2 )

	plural.sub!( /ie$/, 'y' ) or
		plural.sub!( /ue$/, 'u' ) or
		plural.sub!( /([auy])e$/, '$1' ) or
		plural.sub!( /i$/, '' ) or
		plural.sub!( /([^e])e$/, "\\1" ) or
		/er$/.match( plural ) or
		plural.sub!( /([^aeiou][aeiouy]([bdgmnprst]))$/, "\\1\\2" )

       return "#{plural}ing"
end

.proper_noun(string) ⇒ Object

Returns the proper noun form of a string by capitalizing most of the words.

Examples:

English.proper_noun("bosnia and herzegovina") ->
  "Bosnia and Herzegovina"
English.proper_noun("macedonia, the former yugoslav republic of") ->
  "Macedonia, the Former Yugoslav Republic of"
English.proper_noun("virgin islands, u.s.") ->
  "Virgin Islands, U.S."


1662
1663
1664
1665
1666
1667
1668
# File 'lib/linguistics/en.rb', line 1662

def proper_noun( string )
	return string.split(/([ .]+)/).collect {|word|
		next word unless /^[a-z]/.match( word ) &&
			! (%w{and the of}.include?( word ))
		word.capitalize
	}.join
end

.quantify(phrase, number = 0, args = {}) ⇒ Object

:joinword

Sets the word (and any surrounding spaces) used as the word separating the quantity from the noun in the resulting string. Defaults to ' of '.



1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
# File 'lib/linguistics/en.rb', line 1381

def quantify( phrase, number=0, args={} )
	num = number.to_i
	config = QuantifyDefaults.merge( args )

	case num
	when 0
		no( phrase )
	when 1
		a( phrase )
	when SeveralRange
		"several " + plural( phrase, num )
	when NumberRange
		"a number of " + plural( phrase, num )
	when NumerousRange
		"numerous " + plural( phrase, num )
	when ManyRange
		"many " + plural( phrase, num )
	else

		# Anything bigger than the ManyRange gets described like
		# "hundreds of thousands of..." or "millions of..."
		# depending, of course, on how many there are.
		thousands, subthousands = Math::log10( num ).to_i.divmod( 3 )
		stword =
			case subthousands
			when 2
				"hundreds"
			when 1
				"tens"
			else
				nil
			end
		thword = plural( to_thousands(thousands).strip )
		thword = nil if thword.empty?

		[	# Hundreds (of)...
			stword,

			# thousands (of)
			thword,

			# stars.
			plural(phrase, number)
		].compact.join( config[:joinword] )
	end
end

.sentence(obj) ⇒ Object

Return a LinkParser::Sentence for the stringified obj.



104
105
106
# File 'lib/linguistics/en/linkparser.rb', line 104

def sentence( obj )
	return Linguistics::EN::lp_dict.parse( obj.to_s )
end

.synset(word, pos = nil, sense = 1) ⇒ Object

Look up the synset associated with the given word or collocation in the WordNet lexicon and return a WordNet::Synset object.



148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
# File 'lib/linguistics/en/wordnet.rb', line 148

def synset( word, pos=nil, sense=1 )
	lex = Linguistics::EN::wn_lexicon
	if pos.is_a?( Fixnum )
		sense = pos
		pos = nil
	end
	postries = pos ? [pos] : [:noun, :verb, :adjective, :adverb, :other]
	syn = nil

	postries.each do |pos|
		break if syn = lex.lookup_synsets( word.to_s, pos, sense )
	end

	return syn
end

.synsets(word, pos = nil) ⇒ Object

Look up all the synsets associated with the given word or collocation in the WordNet lexicon and return an Array of WordNet::Synset objects. If pos is nil, return synsets for all parts of speech.



168
169
170
171
172
173
174
175
176
177
178
# File 'lib/linguistics/en/wordnet.rb', line 168

def synsets( word, pos=nil )
	lex = Linguistics::EN::wn_lexicon
	postries = pos ? [pos] : [:noun, :verb, :adjective, :adverb, :other]
	syns = []

	postries.each {|pos|
		syns << lex.lookup_synsets( word.to_s, pos )
	}

	return syns.flatten.compact
end

.titlecase(string) ⇒ Object

Returns the given string as a title-cased phrase.



1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
# File 'lib/linguistics/en.rb', line 1621

def titlecase( string ) # :nodoc:

	# Split on word-boundaries
	words = string.split( /\b/ )

	# Always capitalize the first and last words
	words.first.capitalize!
	words.last.capitalize!

	# Now scan the rest of the tokens, skipping non-words and capitalization
	# exceptions.
	words.each_with_index do |word, i|

		# Non-words
		next unless /^\w+$/.match( word )

		# Skip exception-words
		next if TitleCaseExceptions.include?( word )

		# Skip second parts of contractions
		next if words[i - 1] == "'" && /\w/.match( words[i - 2] )

		# Have to do it this way instead of capitalize! because that method
		# also downcases all other letters.
		word.gsub!( /^(\w)(.*)/ ) { $1.upcase + $2 }
	end

	return words.join
end

.to_hundreds(hundreds, tens = 0, units = 0, thousands = 0, joinword = " and ") ⇒ Object

Transform the specified number of hundreds-, tens-, and units-place numerals into a word phrase. If the number of thousands (thousands) is greater than 0, it will be used to determine where the decimal point is in relation to the hundreds-place number.



1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
# File 'lib/linguistics/en.rb', line 1014

def to_hundreds( hundreds, tens=0, units=0, thousands=0, joinword=" and " )
	joinword = ' ' if joinword.empty?
	if hundreds.nonzero?
		return to_units( hundreds ) + " hundred" +
			(tens.nonzero? || units.nonzero? ? joinword : '') +
			to_tens( tens, units ) +
			to_thousands( thousands )
	elsif tens.nonzero? || units.nonzero?
		return to_tens( tens, units ) + to_thousands( thousands )
	else
		return nil
	end
end

.to_tens(tens, units, thousands = 0) ⇒ Object

Transform the specified number of tens- and units-place numerals into a word-phrase at the given number of thousands places.



1000
1001
1002
1003
1004
1005
1006
1007
# File 'lib/linguistics/en.rb', line 1000

def to_tens( tens, units, thousands=0 )
	unless tens == 1
		return Tens[ tens ] + ( tens.nonzero? && units.nonzero? ? '-' : '' ) +
			to_units( units, thousands )
	else
		return Teens[ units ] + to_thousands( thousands )
	end
end

.to_thousands(thousands = 0) ⇒ Object

Transform the specified number into one or more words like ‘thousand’, ‘million’, etc. Uses the thousands (American) system.



1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
# File 'lib/linguistics/en.rb', line 1030

def to_thousands( thousands=0 )
	parts = []
	(0..thousands).step( Thousands.length - 1 ) {|i|
		if i.zero?
			parts.push Thousands[ thousands % (Thousands.length - 1) ]
		else
			parts.push Thousands.last
		end
	}

	return parts.join(" ")
end

.to_units(units, thousands = 0) ⇒ Object

Transform the specified number of units-place numerals into a word-phrase at the given number of thousands places.



993
994
995
# File 'lib/linguistics/en.rb', line 993

def to_units( units, thousands=0 )
	return Units[ units ] + to_thousands( thousands )
end

.wn_errorObject

If #haveWordnet? returns false, this can be called to fetch the exception which was raised when WordNet was loaded.



105
# File 'lib/linguistics/en/wordnet.rb', line 105

def wn_error ; @wn_error; end

.wn_lexiconObject

The instance of the WordNet::Lexicon used for all Linguistics WordNet functions.



109
110
111
112
113
114
115
116
117
# File 'lib/linguistics/en/wordnet.rb', line 109

def wn_lexicon
	if @wn_error
		raise NotImplementedError,
			"WordNet functions are not loaded: %s" %
			@wn_error.message
	end

	@wn_lexicon ||= WordNet::Lexicon::new
end