Module: Linguistics::EN::Pluralization

Defined in:: lib/linguistics/en/pluralization.rb

Overview

Plural inflection methods for the English-language Linguistics module.

It provides conversion of plural forms of all nouns, most verbs, and some adjectives. It also provides “classical” variants (for example: “brother” -> “brethren”, “dogma” -> “dogmata”, etc.) where appropriate.

Constant Summary collapse

PL_sb_irregular_s = Plurals

{
  "ephemeris" => "ephemerides",
  "iris"      => "irises|irides",
  "clitoris"  => "clitorises|clitorides",
  "corpus"    => "corpuses|corpora",
  "opus"      => "opuses|opera",
  "genus"     => "genera",
  "mythos"    => "mythoi",
  "penis"     => "penises|penes",
  "testis"    => "testes",
}

PL_sb_irregular_h =

{
  "child"       => "children",
  "brother"     => "brothers|brethren",
  "loaf"        => "loaves",
  "hoof"        => "hoofs|hooves",
  "beef"        => "beefs|beeves",
  "money"       => "monies",
  "mongoose"    => "mongooses",
  "ox"          => "oxen",
  "cow"         => "cows|kine",
  "soliloquy"   => "soliloquies",
  "graffito"    => "graffiti",
  "prima donna" => "prima donnas|prime donne",
  "octopus"     => "octopuses|octopodes",
  "genie"       => "genies|genii",
  "ganglion"    => "ganglions|ganglia",
  "trilby"      => "trilbys",
  "turf"        => "turfs|turves",
}.update( PL_sb_irregular_s )

PL_sb_irregular =

matchgroup PL_sb_irregular_h.keys

PL_sb_C_a_ata = Classical “..a” -> “..ata”

matchgroup %w[
  anathema bema carcinoma charisma diploma
  dogma drama edema enema enigma lemma
  lymphoma magma melisma miasma oedema
  sarcoma schema soma stigma stoma trauma
  gumma pragma
].collect {|word| word[0...-1]}

PL_sb_U_a_ae = Unconditional “..a” -> “..ae”

matchgroup %w[
  alumna alga vertebra persona
]

PL_sb_C_a_ae = Classical “..a” -> “..ae”

matchgroup [/.*umbra/ ] + %w[
  amoeba antenna formula hyperbola
  medusa nebula parabola abscissa
  hydra nova lacuna aurora
  flora fauna
]

PL_sb_C_en_ina = Classical “..en” -> “..ina”

matchgroup %w[
  stamen foramen lumen
].collect {|word| word[0...-2] }

PL_sb_U_um_a = Unconditional “..um” -> “..a”

matchgroup %w[
  bacterium agendum desideratum erratum
  stratum datum ovum extremum candelabrum
].collect {|word| word[0...-2] }

PL_sb_C_um_a = Classical “..um” -> “..a”

matchgroup %w[
  maximum minimum momentum optimum
  quantum cranium curriculum dictum
  phylum aquarium compendium emporium
  enconium gymnasium honorarium interregnum
  lustrum memorandum millenium  rostrum
  spectrum speculum stadium trapezium
  ultimatum medium vacuum velum
  consortium
].collect {|word| word[0...-2]}

PL_sb_U_us_i = Unconditional “..us” -> “i”

matchgroup %w[
  alumnus alveolus bacillus bronchus
  locus nucleus stimulus meniscus
].collect {|word| word[0...-2]}

PL_sb_C_us_i = Classical “..us” -> “..i”

matchgroup %w[
  focus   radius      genius
  incubus succubus    nimbus
  fungus  nucleolus   stylus
  torus   umbilicus   uterus
  hippopotamus
].collect {|word| word[0...-2]}

PL_sb_C_us_us = Classical “..us” -> “..us” (assimilated 4th declension latin nouns)

matchgroup %w[
  status apparatus prospectus sinus
  hiatus impetus plexus
]

PL_sb_U_on_a = Unconditional “..on” -> “a”

matchgroup %w[
  criterion perihelion aphelion
  phenomenon prolegomenon noumenon
  organon asyndeton hyperbaton
].collect {|word| word[0...-2]}

PL_sb_C_on_a = Classical “..on” -> “..a”

matchgroup %w[
  oxymoron
].collect {|word| word[0...-2]}

PL_sb_C_o_i_a = Classical “..o” -> “..i” (but normally -> “..os”)

%w[
  solo soprano basso alto
  contralto tempo piano
]

PL_sb_C_o_i =

matchgroup PL_sb_C_o_i_a.collect{|word| word[0...-1]}

PL_sb_U_o_os = Always “..o” -> “..os”

matchgroup( %w[
  albino archipelago armadillo
  commando crescendo fiasco
  ditto dynamo embryo
  ghetto guano inferno
  jumbo lumbago magneto
  manifesto medico octavo
  photo pro quarto
  canto lingo generalissimo
  stylo rhino
] | PL_sb_C_o_i_a )

PL_sb_U_ex_ices = Unconditional “..[ei]x” -> “..ices”

matchgroup %w[
  codex murex silex
].collect {|word| word[0...-2]}

PL_sb_U_ix_ices =

matchgroup %w[
  radix helix
].collect {|word| word[0...-2]}

PL_sb_C_ex_ices = Classical “..[ei]x” -> “..ices”

matchgroup %w[
  vortex vertex cortex latex
  pontifex apex index simplex
].collect {|word| word[0...-2]}

PL_sb_C_ix_ices =

matchgroup %w[
  appendix
].collect {|word| word[0...-2]}

PL_sb_C_i = Arabic: “..” -> “..i”

matchgroup %w[
  afrit afreet efreet
]

PL_sb_C_im = Hebrew: “..” -> “..im”

matchgroup %w[
  goy seraph cherub
]

PL_sb_U_man_mans = Unconditional “..man” -> “..mans”

matchgroup %w[
  human
  Alabaman Bahaman Burman German
  Hiroshiman Liman Nakayaman Oklahoman
  Panaman Selman Sonaman Tacoman Yakiman
  Yokohaman Yuman
]

PL_sb_uninflected_s =

[
  # Pairs or groups subsumed to a singular...
  "breeches", "britches", "clippers", "gallows", "hijinks",
  "headquarters", "pliers", "scissors", "testes", "herpes",
  "pincers", "shears", "proceedings", "trousers",

  # Unassimilated Latin 4th declension
  "cantus", "coitus", "nexus",

  # Recent imports...
  "contretemps", "corps", "debris",
  /.*ois/,

  # Diseases
  /.*measles/, "mumps",

  # Miscellaneous others...
  "diabetes", "jackanapes", "series", "species", "rabies",
  "chassis", "innings", "news", "mews",
]

PL_sb_uninflected_herd = Don’t inflect in classical mode, otherwise normal inflection

matchgroup %w[
  wildebeest swine eland bison buffalo
  elk moose rhinoceros
]

PL_sb_uninflected =

matchgroup(

  # Some fish and herd animals
  /.*fish/, "tuna", "salmon", "mackerel", "trout",
  "bream", /sea[- ]bass/, "carp", "cod", "flounder", "whiting",

  /.*deer/, /.*sheep/,

  # All nationals ending in -ese
  "Portuguese", "Amoyese", "Borghese", "Congoese", "Faroese",
  "Foochowese", "Genevese", "Genoese", "Gilbertese", "Hottentotese",
  "Kiplingese", "Kongoese", "Lucchese", "Maltese", "Nankingese",
  "Niasese", "Pekingese", "Piedmontese", "Pistoiese", "Sarawakese",
  "Shavese", "Vermontese", "Wenchowese", "Yengeese",
  /.*[nrlm]ese/,

  # Some words ending in ...s (often pairs taken as a whole)
  PL_sb_uninflected_s,

  # Diseases
  /.*pox/,

  # Other oddities
  "graffiti", "djinn"
)

PL_sb_singular_s = Singular words ending in …s (all inflect with …es)

matchgroup [ /.*ss/, /.*us/ ] +
%w[
  acropolis aegis alias arthritis asbestos atlas
  bathos bias bronchitis bursitis caddis cannabis
  canvas chaos cosmos dais digitalis encephalitis
  epidermis ethos eyas gas glottis hepatitis
  hubris ibis lens mantis marquis metropolis
  neuritis pathos pelvis polis rhinoceros
  sassafras tonsillitis trellis
]

PL_v_special_s =

matchgroup [
  PL_sb_singular_s,
  PL_sb_uninflected_s,
  PL_sb_irregular_s.keys,
  /(.*[csx])is/,
  /(.*)ceps/,
  /[A-Z].*s/,
]

PL_sb_postfix_adj =

'(' + {
  'general' => '(?!major|lieutenant|brigadier|adjutant)\S+',
  'martial' => "court",
}.collect {|key,val|
  "(?:#{val})(?=(?:-|\\s+)#{key})"
}.join("|") + ")(.*)"

PL_sb_military =

%r'major|lieutenant|brigadier|adjutant|quartermaster'

PL_sb_general =

%r'((?!#{PL_sb_military.source}).*?)((-|\s+)general)'

PL_prep =

matchgroup %w[
  about above across after among around at athwart before behind
  below beneath beside besides between betwixt beyond but by
  during except for from in into near of off on onto out over
  since till to under until unto upon with
]

PL_sb_prep_dual_compound =

%r'(.*?)((?:-|\s+)(?:#{PL_prep}|d[eu])(?:-|\s+))a(?:-|\s+)(.*)'

PL_sb_prep_compound =

%r'(.*?)((-|\s+)(#{PL_prep}|d[eu])((-|\s+)(.*))?)'

PL_pron_nom_h =

{
  #   Nominative      Reflexive
  "i"     => "we",    "myself"   => "ourselves",
  "you"   => "you",   "yourself" => "yourselves",
  "she"   => "they",  "herself"  => "themselves",
  "he"    => "they",  "himself"  => "themselves",
  "it"    => "they",  "itself"   => "themselves",
  "they"  => "they",  "themself" => "themselves",

  #   Possessive
  "mine"   => "ours",
  "yours"  => "yours",
  "hers"   => "theirs",
  "his"    => "theirs",
  "its"    => "theirs",
  "theirs" => "theirs",
}

PL_pron_nom =

Regexp.new( PL_pron_nom_h.keys.join('|'), Regexp::IGNORECASE )

PL_pron_acc_h =

{
  #  Accusative Reflexive
  "me"    => "us",    "myself"   =>   "ourselves",
  "you"   => "you",   "yourself" =>   "yourselves",
  "her"   => "them",  "herself"  =>   "themselves",
  "him"   => "them",  "himself"  =>   "themselves",
  "it"    => "them",  "itself"   =>   "themselves",
  "them"  => "them",  "themself" =>   "themselves",
}

PL_pron_acc =

matchgroup PL_pron_acc_h.keys

PL_v_irregular_pres_h =

{
  # 1St pers. sing.     2nd pers. sing.     3rd pers. singular
  #                     3rd pers. (indet.)
  "am"    => "are",     "are"   => "are",   "is"  => "are",
  "was"   => "were",    "were"  => "were",
  "have"  => "have",                        "has" => "have",
}

PL_v_irregular_pres =

matchgroup PL_v_irregular_pres_h.keys

PL_v_ambiguous_pres_h =

{
  #   1st pers. sing.     2nd pers. sing.     3rd pers. singular
  #                       3rd pers. (indet.)
  "act"   => "act",                           "acts"    => "act",
  "blame" => "blame",                         "blames"  => "blame",
  "can"   => "can",
  "must"  => "must",
  "fly"   => "fly",                           "flies"   => "fly",
  "copy"  => "copy",                          "copies"  => "copy",
  "drink" => "drink",                         "drinks"  => "drink",
  "fight" => "fight",                         "fights"  => "fight",
  "fire"  => "fire",                          "fires"   => "fire",
  "like"  => "like",                          "likes"   => "like",
  "look"  => "look",                          "looks"   => "look",
  "make"  => "make",                          "makes"   => "make",
  "reach" => "reach",                         "reaches" => "reach",
  "run"   => "run",                           "runs"    => "run",
  "sink"  => "sink",                          "sinks"   => "sink",
  "sleep" => "sleep",                         "sleeps"  => "sleep",
  "view"  => "view",                          "views"   => "view",
}

PL_v_ambiguous_pres =

matchgroup PL_v_ambiguous_pres_h.keys

PL_v_irregular_non_pres =

matchgroup %w[
  did had ate made put
  spent fought sank gave sought
  shall could ought should
]

PL_v_ambiguous_non_pres =

matchgroup %w[
  thought saw bent will might cut
]

PL_count_zero =

matchgroup %w[
  0 no zero nil
]

PL_count_one =

matchgroup %w[
  1 a an one each every this that
]

PL_adj_special_h =

{
  "a"    => "some",   "an"   =>  "some",
  "this" => "these",  "that" => "those",
}

PL_adj_special =

matchgroup PL_adj_special_h.keys

PL_adj_poss_h =

{
  "my"    => "our",
  "your"  => "your",
  "its"   => "their",
  "her"   => "their",
  "his"   => "their",
  "their" => "their",
}

PL_adj_poss =

matchgroup PL_adj_poss_h.keys

Instance Method Summary collapse

#plural(count = 2) ⇒ Object

Return the plural of the given phrase if count indicates it should be plural.
#plural_adjective(count = 2) ⇒ Object (also: #plural_adj)

Return the plural of the given adjectival phrase if count indicates it should be plural.
#plural_noun(count = 2) ⇒ Object

Return the plural of the given noun phrase if count indicates it should be plural.
#plural_verb(count = 2) ⇒ Object

Return the plural of the given verb phrase if count indicates it should be plural.

Instance Method Details

#plural(count = 2) ⇒ `Object`

Return the plural of the given phrase if count indicates it should be plural.

# File 'lib/linguistics/en/pluralization.rb', line 399

def plural( count=2 )
  phrase = if self.respond_to?( :to_int )
      self.numwords
    else
      self.to_s
    end

  self.log.debug "Pluralizing %p" % [ phrase ]
  pre = text = post = nil

  # If the string has whitespace, only pluralize the middle bit, but
  # preserve the whitespace to add back to the result.
  if md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
    pre, text, post = md.captures
  else
    return phrase
  end

  plural = postprocess( text,
    pluralize_special_adjective(text, count) ||
    pluralize_special_verb(text, count) ||
    pluralize_noun(text, count) )

  return pre + plural + post
end

#plural_adjective(count = 2) ⇒ `Object` Also known as: plural_adj

Return the plural of the given adjectival phrase if count indicates it should be plural.

# File 'lib/linguistics/en/pluralization.rb', line 461

def plural_adjective( count=2 )
  phrase = self.to_s
  md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase )
  pre, word, post = md.captures

  return phrase if word.nil? or word.empty?

  plural = postprocess( word, pluralize_special_adjective(word, count) || word )

  return pre + plural + post
end

#plural_noun(count = 2) ⇒ `Object`

Return the plural of the given noun phrase if count indicates it should be plural.

# File 'lib/linguistics/en/pluralization.rb', line 429

def plural_noun( count=2 )
  phrase = self.to_s
  md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase )
  pre, word, post = md.captures

  return phrase if word.nil? or word.empty?

  plural = postprocess( word, pluralize_noun(word, count) )

  return pre + plural + post
end

#plural_verb(count = 2) ⇒ `Object`

Return the plural of the given verb phrase if count indicates it should be plural.

# File 'lib/linguistics/en/pluralization.rb', line 444

def plural_verb( count=2 )
  phrase = self.to_s
  md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase )
  pre, word, post = md.captures

  return phrase if word.nil? or word.empty?

  plural = postprocess( word,
    pluralize_special_verb(word, count) ||
    pluralize_general_verb(word, count) )

  return pre + plural + post
end

Module: Linguistics::EN::Pluralization

Overview

Constant Summary collapse

Instance Method Summary collapse

Instance Method Details

#plural(count = 2) ⇒ Object

#plural_adjective(count = 2) ⇒ Object Also known as: plural_adj

#plural_noun(count = 2) ⇒ Object

#plural_verb(count = 2) ⇒ Object

#plural(count = 2) ⇒ `Object`

#plural_adjective(count = 2) ⇒ `Object` Also known as: plural_adj

#plural_noun(count = 2) ⇒ `Object`

#plural_verb(count = 2) ⇒ `Object`