Module: Linguistics::EN::Pluralization

Defined in:
lib/linguistics/en/pluralization.rb

Overview

Plural inflection methods for the English-language Linguistics module.

It provides conversion of plural forms of all nouns, most verbs, and some adjectives. It also provides “classical” variants (for example: “brother” -> “brethren”, “dogma” -> “dogmata”, etc.) where appropriate.

Constant Summary collapse

PL_sb_irregular_s =

Plurals

{
	"ephemeris" => "ephemerides",
	"iris"      => "irises|irides",
	"clitoris"  => "clitorises|clitorides",
	"corpus"    => "corpuses|corpora",
	"opus"      => "opuses|opera",
	"genus"     => "genera",
	"mythos"    => "mythoi",
	"penis"     => "penises|penes",
	"testis"    => "testes",
}
PL_sb_irregular_h =
{
	"child"       => "children",
	"brother"     => "brothers|brethren",
	"loaf"        => "loaves",
	"hoof"        => "hoofs|hooves",
	"beef"        => "beefs|beeves",
	"money"       => "monies",
	"mongoose"    => "mongooses",
	"ox"          => "oxen",
	"cow"         => "cows|kine",
	"soliloquy"   => "soliloquies",
	"graffito"    => "graffiti",
	"prima donna" => "prima donnas|prime donne",
	"octopus"     => "octopuses|octopodes",
	"genie"       => "genies|genii",
	"ganglion"    => "ganglions|ganglia",
	"trilby"      => "trilbys",
	"turf"        => "turfs|turves",
}.update( PL_sb_irregular_s )
PL_sb_irregular =
matchgroup PL_sb_irregular_h.keys
PL_sb_C_a_ata =

Classical “..a” -> “..ata”

matchgroup %w[
	anathema bema carcinoma charisma diploma
	dogma drama edema enema enigma lemma
	lymphoma magma melisma miasma oedema
	sarcoma schema soma stigma stoma trauma
	gumma pragma
].collect {|word| word[0...-1]}
PL_sb_U_a_ae =

Unconditional “..a” -> “..ae”

matchgroup %w[
	alumna alga vertebra persona
]
PL_sb_C_a_ae =

Classical “..a” -> “..ae”

matchgroup [/.*umbra/ ] + %w[
	amoeba antenna formula hyperbola
	medusa nebula parabola abscissa
	hydra nova lacuna aurora
	flora fauna
]
PL_sb_C_en_ina =

Classical “..en” -> “..ina”

matchgroup %w[
	stamen foramen lumen
].collect {|word| word[0...-2] }
PL_sb_U_um_a =

Unconditional “..um” -> “..a”

matchgroup %w[
	bacterium agendum desideratum erratum
	stratum datum ovum extremum candelabrum
].collect {|word| word[0...-2] }
PL_sb_C_um_a =

Classical “..um” -> “..a”

matchgroup %w[
	maximum minimum momentum optimum
	quantum cranium curriculum dictum
	phylum aquarium compendium emporium
	enconium gymnasium honorarium interregnum
	lustrum memorandum millenium  rostrum
	spectrum speculum stadium trapezium
	ultimatum medium vacuum velum
	consortium
].collect {|word| word[0...-2]}
PL_sb_U_us_i =

Unconditional “..us” -> “i”

matchgroup %w[
	alumnus alveolus bacillus bronchus
	locus nucleus stimulus meniscus
].collect {|word| word[0...-2]}
PL_sb_C_us_i =

Classical “..us” -> “..i”

matchgroup %w[
	focus   radius      genius
	incubus succubus    nimbus
	fungus  nucleolus   stylus
	torus   umbilicus   uterus
	hippopotamus
].collect {|word| word[0...-2]}
PL_sb_C_us_us =

Classical “..us” -> “..us” (assimilated 4th declension latin nouns)

matchgroup %w[
	status apparatus prospectus sinus
	hiatus impetus plexus
]
PL_sb_U_on_a =

Unconditional “..on” -> “a”

matchgroup %w[
	criterion perihelion aphelion
	phenomenon prolegomenon noumenon
	organon asyndeton hyperbaton
].collect {|word| word[0...-2]}
PL_sb_C_on_a =

Classical “..on” -> “..a”

matchgroup %w[
	oxymoron
].collect {|word| word[0...-2]}
PL_sb_C_o_i_a =

Classical “..o” -> “..i” (but normally -> “..os”)

%w[
	solo soprano basso alto
	contralto tempo piano
]
PL_sb_C_o_i =
matchgroup PL_sb_C_o_i_a.collect{|word| word[0...-1]}
PL_sb_U_o_os =

Always “..o” -> “..os”

matchgroup( %w[
	albino archipelago armadillo
	commando crescendo fiasco
	ditto dynamo embryo
	ghetto guano inferno
	jumbo lumbago magneto
	manifesto medico octavo
	photo pro quarto
	canto lingo generalissimo
	stylo rhino
] | PL_sb_C_o_i_a )
PL_sb_U_ex_ices =

Unconditional “..[ei]x” -> “..ices”

matchgroup %w[
	codex murex silex
].collect {|word| word[0...-2]}
PL_sb_U_ix_ices =
matchgroup %w[
	radix helix
].collect {|word| word[0...-2]}
PL_sb_C_ex_ices =

Classical “..[ei]x” -> “..ices”

matchgroup %w[
	vortex vertex cortex latex
	pontifex apex index simplex
].collect {|word| word[0...-2]}
PL_sb_C_ix_ices =
matchgroup %w[
	appendix
].collect {|word| word[0...-2]}
PL_sb_C_i =

Arabic: “..” -> “..i”

matchgroup %w[
	afrit afreet efreet
]
PL_sb_C_im =

Hebrew: “..” -> “..im”

matchgroup %w[
	goy seraph cherub
]
PL_sb_U_man_mans =

Unconditional “..man” -> “..mans”

matchgroup %w[
	human
	Alabaman Bahaman Burman German
	Hiroshiman Liman Nakayaman Oklahoman
	Panaman Selman Sonaman Tacoman Yakiman
	Yokohaman Yuman
]
PL_sb_uninflected_s =
[
	# Pairs or groups subsumed to a singular...
	"breeches", "britches", "clippers", "gallows", "hijinks",
	"headquarters", "pliers", "scissors", "testes", "herpes",
	"pincers", "shears", "proceedings", "trousers",

	# Unassimilated Latin 4th declension
	"cantus", "coitus", "nexus",

	# Recent imports...
	"contretemps", "corps", "debris",
	/.*ois/,

	# Diseases
	/.*measles/, "mumps",

	# Miscellaneous others...
	"diabetes", "jackanapes", "series", "species", "rabies",
	"chassis", "innings", "news", "mews",
]
PL_sb_uninflected_herd =

Don’t inflect in classical mode, otherwise normal inflection

matchgroup %w[
	wildebeest swine eland bison buffalo
	elk moose rhinoceros
]
PL_sb_uninflected =
matchgroup(

	# Some fish and herd animals
	/.*fish/, "tuna", "salmon", "mackerel", "trout",
	"bream", /sea[- ]bass/, "carp", "cod", "flounder", "whiting",

	/.*deer/, /.*sheep/,

	# All nationals ending in -ese
	"Portuguese", "Amoyese", "Borghese", "Congoese", "Faroese",
	"Foochowese", "Genevese", "Genoese", "Gilbertese", "Hottentotese",
	"Kiplingese", "Kongoese", "Lucchese", "Maltese", "Nankingese",
	"Niasese", "Pekingese", "Piedmontese", "Pistoiese", "Sarawakese",
	"Shavese", "Vermontese", "Wenchowese", "Yengeese",
	/.*[nrlm]ese/,

	# Some words ending in ...s (often pairs taken as a whole)
	PL_sb_uninflected_s,

	# Diseases
	/.*pox/,

	# Other oddities
	"graffiti", "djinn"
)
PL_sb_singular_s =

Singular words ending in …s (all inflect with …es)

matchgroup [ /.*ss/, /.*us/ ] +
%w[
	acropolis aegis alias arthritis asbestos atlas
	bathos bias bronchitis bursitis caddis cannabis
	canvas chaos cosmos dais digitalis encephalitis
	epidermis ethos eyas gas glottis hepatitis
	hubris ibis lens mantis marquis metropolis
	neuritis pathos pelvis polis rhinoceros
	sassafras tonsillitis trellis
]
PL_v_special_s =
matchgroup [
	PL_sb_singular_s,
	PL_sb_uninflected_s,
	PL_sb_irregular_s.keys,
	/(.*[csx])is/,
	/(.*)ceps/,
	/[A-Z].*s/,
]
PL_sb_postfix_adj =
'(' + {
	'general' => '(?!major|lieutenant|brigadier|adjutant)\S+',
	'martial' => "court",
}.collect {|key,val|
	"(?:#{val})(?=(?:-|\\s+)#{key})"
}.join("|") + ")(.*)"
PL_sb_military =
%r'major|lieutenant|brigadier|adjutant|quartermaster'
PL_sb_general =
%r'((?!#{PL_sb_military.source}).*?)((-|\s+)general)'
PL_prep =
matchgroup %w[
	about above across after among around at athwart before behind
	below beneath beside besides between betwixt beyond but by
	during except for from in into near of off on onto out over
	since till to under until unto upon with
]
PL_sb_prep_dual_compound =
%r'(.*?)((?:-|\s+)(?:#{PL_prep}|d[eu])(?:-|\s+))a(?:-|\s+)(.*)'
PL_sb_prep_compound =
%r'(.*?)((-|\s+)(#{PL_prep}|d[eu])((-|\s+)(.*))?)'
PL_pron_nom_h =
{
	#   Nominative      Reflexive
	"i"     => "we",    "myself"   => "ourselves",
	"you"   => "you",   "yourself" => "yourselves",
	"she"   => "they",  "herself"  => "themselves",
	"he"    => "they",  "himself"  => "themselves",
	"it"    => "they",  "itself"   => "themselves",
	"they"  => "they",  "themself" => "themselves",

	#   Possessive
	"mine"   => "ours",
	"yours"  => "yours",
	"hers"   => "theirs",
	"his"    => "theirs",
	"its"    => "theirs",
	"theirs" => "theirs",
}
PL_pron_nom =
Regexp.new( PL_pron_nom_h.keys.join('|'), Regexp::IGNORECASE )
PL_pron_acc_h =
{
	#	Accusative Reflexive
	"me"    => "us",    "myself"   =>   "ourselves",
	"you"   => "you",   "yourself" =>   "yourselves",
	"her"   => "them",  "herself"  =>   "themselves",
	"him"   => "them",  "himself"  =>   "themselves",
	"it"    => "them",  "itself"   =>   "themselves",
	"them"  => "them",  "themself" =>   "themselves",
}
PL_pron_acc =
matchgroup PL_pron_acc_h.keys
PL_v_irregular_pres_h =
{
	# 1St pers. sing.     2nd pers. sing.     3rd pers. singular
	#                     3rd pers. (indet.)
	"am"    => "are",     "are"   => "are",   "is"  => "are",
	"was"   => "were",    "were"  => "were",
	"have"  => "have",                        "has" => "have",
}
PL_v_irregular_pres =
matchgroup PL_v_irregular_pres_h.keys
PL_v_ambiguous_pres_h =
{
	#   1st pers. sing.     2nd pers. sing.     3rd pers. singular
	#                       3rd pers. (indet.)
	"act"   => "act",                           "acts"    => "act",
	"blame" => "blame",                         "blames"  => "blame",
	"can"   => "can",
	"must"  => "must",
	"fly"   => "fly",                           "flies"   => "fly",
	"copy"  => "copy",                          "copies"  => "copy",
	"drink" => "drink",                         "drinks"  => "drink",
	"fight" => "fight",                         "fights"  => "fight",
	"fire"  => "fire",                          "fires"   => "fire",
	"like"  => "like",                          "likes"   => "like",
	"look"  => "look",                          "looks"   => "look",
	"make"  => "make",                          "makes"   => "make",
	"reach" => "reach",                         "reaches" => "reach",
	"run"   => "run",                           "runs"    => "run",
	"sink"  => "sink",                          "sinks"   => "sink",
	"sleep" => "sleep",                         "sleeps"  => "sleep",
	"view"  => "view",                          "views"   => "view",
}
PL_v_ambiguous_pres =
matchgroup PL_v_ambiguous_pres_h.keys
PL_v_irregular_non_pres =
matchgroup %w[
	did had ate made put
	spent fought sank gave sought
	shall could ought should
]
PL_v_ambiguous_non_pres =
matchgroup %w[
	thought saw bent will might cut
]
PL_count_zero =
matchgroup %w[
	0 no zero nil
]
PL_count_one =
matchgroup %w[
	1 a an one each every this that
]
PL_adj_special_h =
{
	"a"    => "some",   "an"   =>  "some",
	"this" => "these",  "that" => "those",
}
PL_adj_special =
matchgroup PL_adj_special_h.keys
PL_adj_poss_h =
{
	"my"    => "our",
	"your"  => "your",
	"its"   => "their",
	"her"   => "their",
	"his"   => "their",
	"their" => "their",
}
PL_adj_poss =
matchgroup PL_adj_poss_h.keys

Instance Method Summary collapse

Instance Method Details

#plural(count = 2) ⇒ Object

Return the plural of the given phrase if count indicates it should be plural.



399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
# File 'lib/linguistics/en/pluralization.rb', line 399

def plural( count=2 )
	phrase = if self.respond_to?( :to_int )
			self.numwords
		else
			self.to_s
		end

	self.log.debug "Pluralizing %p" % [ phrase ]
	pre = text = post = nil

	# If the string has whitespace, only pluralize the middle bit, but
	# preserve the whitespace to add back to the result.
	if md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
		pre, text, post = md.captures
	else
		return phrase
	end

	plural = postprocess( text,
		pluralize_special_adjective(text, count) ||
		pluralize_special_verb(text, count) ||
		pluralize_noun(text, count) )

	return pre + plural + post
end

#plural_adjective(count = 2) ⇒ Object Also known as: plural_adj

Return the plural of the given adjectival phrase if count indicates it should be plural.



461
462
463
464
465
466
467
468
469
470
471
# File 'lib/linguistics/en/pluralization.rb', line 461

def plural_adjective( count=2 )
	phrase = self.to_s
	md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase )
	pre, word, post = md.captures

	return phrase if word.nil? or word.empty?

	plural = postprocess( word, pluralize_special_adjective(word, count) || word )

	return pre + plural + post
end

#plural_noun(count = 2) ⇒ Object

Return the plural of the given noun phrase if count indicates it should be plural.



429
430
431
432
433
434
435
436
437
438
439
# File 'lib/linguistics/en/pluralization.rb', line 429

def plural_noun( count=2 )
	phrase = self.to_s
	md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase )
	pre, word, post = md.captures

	return phrase if word.nil? or word.empty?

	plural = postprocess( word, pluralize_noun(word, count) )

	return pre + plural + post
end

#plural_verb(count = 2) ⇒ Object

Return the plural of the given verb phrase if count indicates it should be plural.



444
445
446
447
448
449
450
451
452
453
454
455
456
# File 'lib/linguistics/en/pluralization.rb', line 444

def plural_verb( count=2 )
	phrase = self.to_s
	md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase )
	pre, word, post = md.captures

	return phrase if word.nil? or word.empty?

	plural = postprocess( word,
		pluralize_special_verb(word, count) ||
		pluralize_general_verb(word, count) )

	return pre + plural + post
end