Module: Linguistics::EN

Defined in:: lib/linguistics/en.rb,
lib/linguistics/en/wordnet.rb,
lib/linguistics/en/infinitive.rb,
lib/linguistics/en/linkparser.rb

Overview

This module contains English-language linguistics functions accessible from the Linguistics module, or as a standalone function library.

Defined Under Namespace

Classes: Infinitive

Constant Summary collapse

Version = CVS version tag

/([\d\.]+)/.match( %q{$Revision: 1.8 $} )[1]

Rcsid = CVS revision tag

%q$Id: en.rb,v 1.8 2003/09/14 10:47:12 deveiant Exp $

PL_sb_irregular_s = Plurals

{
  "ephemeris"  => "ephemerides",
  "iris"   => "irises|irides",
  "clitoris" => "clitorises|clitorides",
  "corpus" => "corpuses|corpora",
  "opus"   => "opuses|opera",
  "genus"    => "genera",
  "mythos" => "mythoi",
  "penis"    => "penises|penes",
  "testis" => "testes",
}

PL_sb_irregular_h =

{
  "child"    => "children",
  "brother"  => "brothers|brethren",
  "loaf"   => "loaves",
  "hoof"   => "hoofs|hooves",
  "beef"   => "beefs|beeves",
  "money"    => "monies",
  "mongoose" => "mongooses",
  "ox"   => "oxen",
  "cow"    => "cows|kine",
  "soliloquy"  => "soliloquies",
  "graffito" => "graffiti",
  "prima donna"  => "prima donnas|prime donne",
  "octopus"  => "octopuses|octopodes",
  "genie"    => "genies|genii",
  "ganglion" => "ganglions|ganglia",
  "trilby" => "trilbys",
  "turf"   => "turfs|turves",
}.update( PL_sb_irregular_s )

PL_sb_irregular =

matchgroup PL_sb_irregular_h.keys

PL_sb_C_a_ata = Classical “..a” -> “..ata”

matchgroup %w[
  anathema bema carcinoma charisma diploma
  dogma drama edema enema enigma lemma
  lymphoma magma melisma miasma oedema
  sarcoma schema soma stigma stoma trauma
  gumma pragma
].collect {|word| word[0...-1]}

PL_sb_U_a_ae = Unconditional “..a” -> “..ae”

matchgroup %w[
  alumna alga vertebra persona
]

PL_sb_C_a_ae = Classical “..a” -> “..ae”

matchgroup %w[
  amoeba antenna formula hyperbola
  medusa nebula parabola abscissa
  hydra nova lacuna aurora .*umbra
  flora fauna
]

PL_sb_C_en_ina = Classical “..en” -> “..ina”

matchgroup %w[
  stamen foramen lumen
].collect {|word| word[0...-2] }

PL_sb_U_um_a = Unconditional “..um” -> “..a”

matchgroup %w[
  bacterium  agendum  desideratum  erratum
  stratum  datum  ovum   extremum
  candelabrum
].collect {|word| word[0...-2] }

PL_sb_C_um_a = Classical “..um” -> “..a”

matchgroup %w[
  maximum  minimum  momentum optimum
  quantum  cranium  curriculum dictum
  phylum aquarium  compendium emporium
  enconium gymnasium honorarium  interregnum
  lustrum  memorandum  millenium  rostrum 
  spectrum speculum  stadium  trapezium
  ultimatum  medium vacuum  velum 
  consortium
].collect {|word| word[0...-2]}

PL_sb_U_us_i = Unconditional “..us” -> “i”

matchgroup %w[
  alumnus  alveolus bacillus  bronchus
  locus  nucleus  stimulus meniscus
].collect {|word| word[0...-2]}

PL_sb_C_us_i = Classical “..us” -> “..i”

matchgroup %w[
  focus  radius genius
  incubus  succubus nimbus
  fungus nucleolus stylus
  torus  umbilicus  uterus
  hippopotamus
].collect {|word| word[0...-2]}

PL_sb_C_us_us = Classical “..us” -> “..us” (assimilated 4th declension latin nouns)

matchgroup %w[
  status apparatus prospectus sinus
  hiatus impetus plexus
]

PL_sb_U_on_a = Unconditional “..on” -> “a”

matchgroup %w[
  criterion  perihelion aphelion
  phenomenon prolegomenon  noumenon
  organon  asyndeton  hyperbaton
].collect {|word| word[0...-2]}

PL_sb_C_on_a = Classical “..on” -> “..a”

matchgroup %w[
  oxymoron
].collect {|word| word[0...-2]}

PL_sb_C_o_i_a = Classical “..o” -> “..i” (but normally -> “..os”)

%w[
  solo   soprano  basso  alto
  contralto  tempo  piano
]

PL_sb_C_o_i =

matchgroup PL_sb_C_o_i_a.collect{|word| word[0...-1]}

PL_sb_U_o_os = Always “..o” -> “..os”

matchgroup( %w[
  albino archipelago armadillo
  commando crescendo fiasco
  ditto  dynamo embryo
  ghetto guano inferno
  jumbo  lumbago  magneto
  manifesto  medico octavo
  photo  pro    quarto  
  canto  lingo  generalissimo
  stylo  rhino
] | PL_sb_C_o_i_a )

PL_sb_U_ex_ices = Unconditional “..[ei]x” -> “..ices”

matchgroup %w[
  codex  murex  silex
].collect {|word| word[0...-2]}

PL_sb_U_ix_ices =

matchgroup %w[
  radix  helix
].collect {|word| word[0...-2]}

PL_sb_C_ex_ices = Classical “..[ei]x” -> “..ices”

matchgroup %w[
  vortex vertex  cortex latex
  pontifex apex    index simplex
].collect {|word| word[0...-2]}

PL_sb_C_ix_ices =

matchgroup %w[
  appendix
].collect {|word| word[0...-2]}

PL_sb_C_i = Arabic: “..” -> “..i”

matchgroup %w[
  afrit  afreet efreet
]

PL_sb_C_im = Hebrew: “..” -> “..im”

matchgroup %w[
  goy    seraph  cherub
]

PL_sb_U_man_mans = Unconditional “..man” -> “..mans”

matchgroup %w[
  human
  Alabaman Bahaman Burman German
  Hiroshiman Liman Nakayaman Oklahoman
  Panaman Selman Sonaman Tacoman Yakiman
  Yokohaman Yuman
]

PL_sb_uninflected_s =

[
  # Pairs or groups subsumed to a singular...
  "breeches", "britches", "clippers", "gallows", "hijinks",
  "headquarters", "pliers", "scissors", "testes", "herpes",
  "pincers", "shears", "proceedings", "trousers",

  # Unassimilated Latin 4th declension
  "cantus", "coitus", "nexus",

  # Recent imports...
  "contretemps", "corps", "debris",
  ".*ois",

  # Diseases
  ".*measles", "mumps",

  # Miscellaneous others...
  "diabetes", "jackanapes", "series", "species", "rabies",
  "chassis", "innings", "news", "mews",
]

PL_sb_uninflected_herd = Don’t inflect in classical mode, otherwise normal inflection

matchgroup %w[
  wildebeest swine eland bison buffalo
  elk moose rhinoceros
]

PL_sb_uninflected =

matchgroup [

  # Some fish and herd animals
  ".*fish", "tuna", "salmon", "mackerel", "trout",
  "bream", "sea[- ]bass", "carp", "cod", "flounder", "whiting", 

  ".*deer", ".*sheep", 

  # All nationals ending in -ese
  "Portuguese", "Amoyese", "Borghese", "Congoese", "Faroese",
  "Foochowese", "Genevese", "Genoese", "Gilbertese", "Hottentotese",
  "Kiplingese", "Kongoese", "Lucchese", "Maltese", "Nankingese",
  "Niasese", "Pekingese", "Piedmontese", "Pistoiese", "Sarawakese",
  "Shavese", "Vermontese", "Wenchowese", "Yengeese",
  ".*[nrlm]ese",

  # Some words ending in ...s (often pairs taken as a whole)
  PL_sb_uninflected_s,

  # Diseases
  ".*pox",

  # Other oddities
  "graffiti", "djinn"
]

PL_sb_singular_s = Singular words ending in …s (all inflect with …es)

matchgroup %w[
  .*ss
  acropolis aegis alias arthritis asbestos atlas
  bathos bias bronchitis bursitis caddis cannabis
  canvas chaos cosmos dais digitalis encephalitis
  epidermis ethos eyas gas glottis hepatitis
  hubris ibis lens mantis marquis metropolis
  neuritis pathos pelvis polis rhinoceros
  sassafras tonsillitis trellis .*us
]

PL_v_special_s =

matchgroup [
  PL_sb_singular_s,
  PL_sb_uninflected_s,
  PL_sb_irregular_s.keys,
  '(.*[csx])is',
  '(.*)ceps',
  '[A-Z].*s',
]

PL_sb_postfix_adj =

'(' + {

  'general' => ['(?!major|lieutenant|brigadier|adjutant)\S+'],
  'martial' => ["court"],

}.collect {|key,val|
  matchgroup( matchgroup(val) + "(?=(?:-|\\s+)#{key})" )
}.join("|") + ")(.*)"

PL_sb_military =

%r'major|lieutenant|brigadier|adjutant|quartermaster'

PL_sb_general =

%r'((?!#{PL_sb_military.source}).*?)((-|\s+)general)'

PL_prep =

matchgroup %w[
  about above across after among around at athwart before behind
  below beneath beside besides between betwixt beyond but by
  during except for from in into near of off on onto out over
  since till to under until unto upon with
]

PL_sb_prep_dual_compound =

%r'(.*?)((?:-|\s+)(?:#{PL_prep}|d[eu])(?:-|\s+))a(?:-|\s+)(.*)'

PL_sb_prep_compound =

%r'(.*?)((-|\s+)(#{PL_prep}|d[eu])((-|\s+)(.*))?)'

PL_pron_nom_h =

{
  #  Nominative   Reflexive
  "i"    => "we",  "myself"   =>  "ourselves",
  "you"  => "you",  "yourself" =>  "yourselves",
  "she"  => "they", "herself"  => "themselves",
  "he" => "they",  "himself"  =>  "themselves",
  "it" => "they",  "itself"   =>  "themselves",
  "they" => "they",  "themself" =>  "themselves",

  #  Possessive
  "mine"  => "ours",
  "yours"   => "yours",
  "hers"  => "theirs",
  "his"   => "theirs",
  "its"   => "theirs",
  "theirs" => "theirs",
}

PL_pron_nom =

matchgroup PL_pron_nom_h.keys

PL_pron_acc_h =

{
  #  Accusative   Reflexive
  "me" => "us",  "myself"   =>  "ourselves",
  "you"  => "you",  "yourself" =>  "yourselves",
  "her"  => "them", "herself"  => "themselves",
  "him"  => "them", "himself"  => "themselves",
  "it" => "them",  "itself"   =>  "themselves",
  "them" => "them",  "themself" =>  "themselves",
}

PL_pron_acc =

matchgroup PL_pron_acc_h.keys

PL_v_irregular_pres_h =

{
  #  1St pers. sing.    2nd pers. sing.   3rd pers. singular
  #        3rd pers. (indet.)  
  "am" => "are", "are" => "are", "is"   => "are",
  "was"  => "were", "were"  => "were", "was"  => "were",
  "have"  => "have", "have"  => "have",  "has"   => "have",
}

PL_v_irregular_pres =

matchgroup PL_v_irregular_pres_h.keys

PL_v_ambiguous_pres_h =

{
  #  1st pers. sing.    2nd pers. sing.   3rd pers. singular
  #        3rd pers. (indet.)  
  "act"  => "act",  "act"  => "act",  "acts"   => "act",
  "blame"  => "blame",  "blame"  => "blame",  "blames"  => "blame",
  "can"  => "can",  "can"  => "can",  "can"    => "can",
  "must" => "must",  "must" => "must",  "must"   => "must",
  "fly"  => "fly",  "fly"  => "fly",  "flies"    => "fly",
  "copy" => "copy",  "copy" => "copy",  "copies"  => "copy",
  "drink"  => "drink",  "drink"  => "drink",  "drinks"  => "drink",
  "fight"  => "fight",  "fight"  => "fight",  "fights"  => "fight",
  "fire" => "fire",  "fire" => "fire",  "fires"   => "fire",
  "like" => "like",  "like" => "like",  "likes"   => "like",
  "look" => "look",  "look" => "look",  "looks"   => "look",
  "make" => "make",  "make" => "make",  "makes"   => "make",
  "reach"  => "reach",  "reach"  => "reach",  "reaches" => "reach",
  "run"  => "run",  "run"  => "run",  "runs"    => "run",
  "sink" => "sink",  "sink" => "sink",  "sinks"   => "sink",
  "sleep"  => "sleep",  "sleep"  => "sleep",  "sleeps"  => "sleep",
  "view" => "view",  "view" => "view",  "views"   => "view",
}

PL_v_ambiguous_pres =

matchgroup PL_v_ambiguous_pres_h.keys

PL_v_irregular_non_pres =

matchgroup %w[
  did had ate made put 
  spent fought sank gave sought
  shall could ought should
]

PL_v_ambiguous_non_pres =

matchgroup %w[
  thought saw bent will might cut
]

PL_count_zero =

matchgroup %w[
  0 no zero nil
]

PL_count_one =

matchgroup %w[
  1 a an one each every this that
]

PL_adj_special_h =

{
  "a"    => "some",  "an"   =>  "some",
  "this" => "these", "that" => "those",
}

PL_adj_special =

matchgroup PL_adj_special_h.keys

PL_adj_poss_h =

{
  "my"    => "our",
  "your" => "your",
  "its"  => "their",
  "her"  => "their",
  "his"  => "their",
  "their"  => "their",
}

PL_adj_poss =

matchgroup PL_adj_poss_h.keys

Nth = Numerical inflections

{
  0 => 'th',
  1 => 'st',
  2 => 'nd',
  3 => 'rd',
  4 => 'th',
  5 => 'th',
  6 => 'th',
  7 => 'th',
  8 => 'th',
  9 => 'th',
  11 => 'th',
  12 => 'th',
  13 => 'th',
}

Ordinals = Ordinal word parts

{}

OrdinalSuffixes =

Ordinals.keys.join("|") + "|"

Units = Numeral names

[''] + %w[one two three four five six seven eight nine]

Teens =

%w[ten eleven twelve thirteen fourteen
fifteen sixteen seventeen eighteen nineteen]

Tens =

['',''] + %w[twenty thirty forty fifty sixty seventy eighty ninety]

Thousands =

[' ', ' thousand'] + %w[
  m b tr quadr quint sext sept oct non dec undec duodec tredec
  quattuordec quindec sexdec septemdec octodec novemdec vigint
].collect {|prefix| ' ' + prefix + 'illion'}

NumberToWordsFunctions = A collection of functions for transforming digits into word phrases. Indexed by the number of digits being transformed; e.g., NumberToWordsFunctions[2] is the function for transforming double-digit numbers.

[
  proc {|*args| raise "No digits (#{args.inspect})"},

  # Single-digits
  proc {|zero,x|
    (x.nonzero? ? to_units(x) : "#{zero} ")
  },

  # Double-digits
  proc {|zero,x,y|
    if x.nonzero?
      to_tens( x, y )
    elsif y.nonzero?
      "#{zero} " + NumberToWordsFunctions[1].call( zero, y )
    else
      ([zero] * 2).join(" ")
    end
  },

  # Triple-digits
  proc {|zero,x,y,z|
    NumberToWordsFunctions[1].call(zero,x) + 
    NumberToWordsFunctions[2].call(zero,y,z)
  }
]

A_abbrev = This pattern matches strings of capitals starting with a “vowel-sound” consonant followed by another consonant, and which are not likely to be real words (oh, all right then, it’s just magic!)

%{
  (?! FJO | [HLMNS]Y.  | RY[EO] | SQU
    | ( F[LR]? | [HL] | MN? | N | RH? | S[CHKLMNPTVW]? | X(YL)?) [AEIOU])
  [FHLMNRSX][A-Z]
}

A_y_cons = This pattern codes the beginnings of all english words begining with a ‘y’ followed by a consonant. Any other y-consonant prefix therefore implies an abbreviation.

'y(b[lor]|cl[ea]|fere|gg|p[ios]|rou|tt)'

A_explicit_an = Exceptions to exceptions

matchgroup( "euler", "hour(?!i)", "heir", "honest", "hono" )

NumwordDefaults = Default configuration arguments for the #numwords function

{
  :group   => 0,
  :comma   => ', ',
  :and   => ' and ',
  :zero    => 'zero',
  :decimal => 'point',
  :asArray => false,
}

SeveralRange = Default ranges for #quantify

2..5

NumberRange =

6..19

NumerousRange =

20..45

ManyRange =

46..99

QuantifyDefaults = Default configuration arguments for the #quantify function

{
  :joinword  => " of ",
}

ConjunctionDefaults = Default configuration arguments for the #conjunction (junction, what’s your) function.

{
  :separator   => ', ',
  :altsep      => '; ',
  :penultimate => true,
  :conjunctive => 'and',
  :combine   => true,
  :casefold    => true,
  :generalize    => false,
  :quantsort   => true,
}

IrregularInfinitives = Irregular words => infinitive forms

{
  'abided'     => 'abide',
  'abode'        => 'abide',
  'am'       => 'be',
  'are'        => 'be',
  'arisen'     => 'arise',
  'arose'        => 'arise',
  'ate'        => 'eat',
  'awaked'     => 'awake',
  'awoke'        => 'awake',
  'bade'       => 'bid',
  'beaten'     => 'beat',
  'became'     => 'become',
  'been'       => 'be',
  'befallen'     => 'befall',
  'befell'     => 'befall',
  'began'        => 'begin',
  'begat'        => 'beget',
  'begot'        => 'beget',
  'begotten'     => 'beget',
  'begun'        => 'begin',
  'beheld'     => 'behold',
  'bent'       => 'bend',
  'bereaved'     => 'bereave',
  'bereft'     => 'bereave',
  'beseeched'      => 'beseech',
  'besought'     => 'beseech',
  'bespoke'      => 'bespeak',
  'bespoken'     => 'bespeak',
  'bestrewed'      => 'bestrew',
  'bestrewn'     => 'bestrew',
  'bestrid'      => 'bestride',
  'bestridden'   => 'bestride',
  'bestrode'     => 'bestride',
  'betaken'      => 'betake',
  'bethought'      => 'bethink',
  'betook'     => 'betake',
  'betted'     => 'bet',
  'bidden'     => 'bid',
  'bided'        => 'bide',
  'bit'        => 'bite',
  'bitten'     => 'bite',
  'bled'       => 'bleed',
  'blended'      => 'blend',
  'blent'        => 'blend',
  'blessed'      => 'bless',
  'blest'        => 'bless',
  'blew'       => 'blow',
  'blown'        => 'blow',
  'bode'       => 'bide',
  'bore'       => 'bear',
  'born'       => 'bear',
  'borne'        => 'bear',
  'bought'     => 'buy',
  'bound'        => 'bind',
  'bred'       => 'breed',
  'broadcasted'    => 'broadcast',
  'broke'        => 'break',
  'broken'     => 'break',
  'brought'      => 'bring',
  'browbeaten'   => 'browbeat',
  'built'        => 'build',
  'burned'     => 'burn',
  'burnt'        => 'burn',
  'came'       => 'come',
  'caught'     => 'catch',
  'chid'       => 'chide',
  'chidden'      => 'chide',
  'chided'     => 'chide',
  'chose'        => 'choose',
  'chosen'     => 'choose',
  'clad'       => 'clothe',
  'clave'        => 'cleave',
  'cleaved'      => 'cleave',
  'cleft'        => 'cleave',
  'clothed'      => 'clothe',
  'clove'        => 'cleave',
  'cloven'     => 'cleave',
  'clung'        => 'cling',
  'costed'     => 'cost',
  'could'        => 'can',
  'crept'        => 'creep',
  'crew'       => 'crow',
  'crowed'     => 'crow',
  'dealt'        => 'deal',
  'did'        => 'do',
  'done'       => 'do',
  'dove'       => 'dive',
  'drank'        => 'drink',
  'drawn'        => 'draw',
  'dreamed'      => 'dream',
  'dreamt'     => 'dream',
  'drew'       => 'draw',
  'driven'     => 'drive',
  'drove'        => 'drive',
  'drunk'        => 'drink',
  'dug'        => 'dig',
  'dwelled'      => 'dwell',
  'dwelt'        => 'dwell',
  'eaten'        => 'eat',
  'fallen'     => 'fall',
  'fed'        => 'feed',
  'fell'       => 'fall',
  'felt'       => 'feel',
  'fled'       => 'flee',
  'flew'       => 'fly',
  'flown'        => 'fly',
  'flung'        => 'fling',
  'forbad'     => 'forbid',
  'forbade'      => 'forbid',
  'forbidden'      => 'forbid',
  'forbore'      => 'forbear',
  'forborne'     => 'forbear',
  'fordid'     => 'fordo',
  'fordone'      => 'fordo',
  'forecasted'   => 'forecast',
  'foregone'     => 'forego',
  'foreknew'     => 'foreknow',
  'foreknown'      => 'foreknow',
  'foreran'      => 'forerun',
  'foresaw'      => 'foresee',
  'foreshowed'   => 'foreshow',
  'foreshown'      => 'foreshow',
  'foretold'     => 'foretell',
  'forewent'     => 'forego',
  'forgave'      => 'forgive',
  'forgiven'     => 'forgive',
  'forgot'     => 'forget',
  'forgotten'      => 'forget',
  'forsaken'     => 'forsake',
  'forseen'      => 'foresee',
  'forsook'      => 'forsake',
  'forswore'     => 'forswear',
  'forsworn'     => 'forswear',
  'fought'     => 'fight',
  'found'        => 'find',
  'froze'        => 'freeze',
  'frozen'     => 'freeze',
  'gainsaid'     => 'gainsay',
  'gave'       => 'give',
  'gilded'     => 'gild',
  'gilt'       => 'gild',
  'girded'     => 'gird',
  'girt'       => 'gird',
  'given'        => 'give',
  'gone'       => 'go',
  'got'        => 'get',
  'gotten'     => 'get',
  'graved'     => 'grave',
  'graven'     => 'grave',
  'grew'       => 'grow',
  'ground'     => 'grind',
  'grown'        => 'grow',
  'had'        => 'have',
  'hamstringed'    => 'hamstring',
  'hamstrung'      => 'hamstring',
  'hanged'     => 'hang',
  'heard'        => 'hear',
  'heaved'     => 'heave',
  'held'       => 'hold',
  'hewed'        => 'hew',
  'hewn'       => 'hew',
  'hid'        => 'hide',
  'hidden'     => 'hide',
  'hove'       => 'heave',
  'hung'       => 'hang',
  'inlaid'     => 'inlay',
  'is'       => 'be',
  'kept'       => 'keep',
  'kneeled'      => 'kneel',
  'knelt'        => 'kneel',
  'knew'       => 'know',
  'knitted'      => 'knit',
  'known'        => 'know',
  'laded'        => 'lade',
  'laden'        => 'lade',
  'laid'       => 'lay',
  'lain'       => 'lie',
  'lay'        => 'lie',
  'leaned'     => 'lean',
  'leant'        => 'lean',
  'leaped'     => 'leap',
  'leapt'        => 'leap',
  'learned'      => 'learn',
  'learnt'     => 'learn',
  'led'        => 'lead',
  'left'       => 'leave',
  'lent'       => 'lend',
  'lighted'      => 'light',
  'lit'        => 'light',
  'lost'       => 'lose',
  'made'       => 'make',
  'meant'        => 'mean',
  'melted'     => 'melt',
  'met'        => 'meet',
  'might'        => 'may',
  'misdealt'     => 'misdeal',
  'misgave'      => 'misgive',
  'misgiven'     => 'misgive',
  'mislaid'      => 'mislay',
  'misled'     => 'mislead',
  'mistaken'     => 'mistake',
  'mistook'      => 'mistake',
  'misunderstood'    => 'misunderstand',
  'molten'     => 'melt',
  'mowed'        => 'mow',
  'mown'       => 'mow',
  'outate'     => 'outeat',
  'outbade'      => 'outbid',
  'outbidden'      => 'outbid',
  'outbred'      => 'outbreed',
  'outdid'     => 'outdo',
  'outdone'      => 'outdo',
  'outeaten'     => 'outeat',
  'outfought'      => 'outfight',
  'outgone'      => 'outgo',
  'outgrew'      => 'outgrow',
  'outgrown'     => 'outgrow',
  'outlaid'      => 'outlay',
  'outran'     => 'outrun',
  'outridden'      => 'outride',
  'outrode'      => 'outride',
  'outsat'     => 'outsit',
  'outshone'     => 'outshine',
  'outshot'      => 'outshoot',
  'outsold'      => 'outsell',
  'outspent'     => 'outspend',
  'outthrew'     => 'outthrow',
  'outthrown'      => 'outthrow',
  'outwent'      => 'outgo',
  'outwore'      => 'outwear',
  'outworn'      => 'outwear',
  'overate'      => 'overeat',
  'overbade'     => 'overbid',
  'overbidden'   => 'overbid',
  'overblew'     => 'overblow',
  'overblown'      => 'overblow',
  'overbore'     => 'overbear',
  'overborn'     => 'overbear',
  'overborne'      => 'overbear',
  'overbought'   => 'overbuy',
  'overbuilt'      => 'overbuild',
  'overcame'     => 'overcome',
  'overdid'      => 'overdo',
  'overdone'     => 'overdo',
  'overdrawn'      => 'overdraw',
  'overdrew'     => 'overdraw',
  'overdriven'   => 'overdrive',
  'overdrove'      => 'overdrive',
  'overeaten'      => 'overeat',
  'overfed'      => 'overfeed',
  'overflew'     => 'overfly',
  'overflown'      => 'overfly',
  'overgrew'     => 'overgrow',
  'overgrown'      => 'overgrow',
  'overhanged'   => 'overhang',
  'overheard'      => 'overhear',
  'overhung'     => 'overhang',
  'overladed'      => 'overlade',
  'overladen'      => 'overlade',
  'overlaid'     => 'overlay',
  'overlain'     => 'overlie',
  'overlay'      => 'overlie',
  'overleaped'   => 'overleap',
  'overleapt'      => 'overleap',
  'overpaid'     => 'overpay',
  'overran'      => 'overrun',
  'overridden'   => 'override',
  'overrode'     => 'override',
  'oversaw'      => 'oversee',
  'overseen'     => 'oversee',
  'oversewed'      => 'oversew',
  'oversewn'     => 'oversew',
  'overshot'     => 'overshoot',
  'overslept'      => 'oversleep',
  'overspent'      => 'overspend',
  'overtaken'      => 'overtake',
  'overthrew'      => 'overthrow',
  'overthrown'   => 'overthrow',
  'overtook'     => 'overtake',
  'overwinded'   => 'overwind',
  'overwound'      => 'overwind',
  'overwritten'    => 'overwrite',
  'overwrote'      => 'overwrite',
  'paid'       => 'pay',
  'partaken'     => 'partake',
  'partook'      => 'partake',
  'prechose'     => 'prechoose',
  'prechosen'      => 'prechoose',
  'proved'     => 'prove',
  'proven'     => 'prove',
  'quitted'      => 'quit',
  'ran'        => 'run',
  'rang'       => 'ring',
  'reaved'     => 'reave',
  'rebuilt'      => 'rebuild',
  'reeved'     => 'reeve',
  'reft'       => 'reave',
  'relaid'     => 'relay',
  'rent'       => 'rend',
  'repaid'     => 'repay',
  'retold'     => 'retell',
  'ridded'     => 'rid',
  'ridden'     => 'ride',
  'risen'        => 'rise',
  'rived'        => 'rive',
  'riven'        => 'rive',
  'rode'       => 'ride',
  'rose'       => 'rise',
  'rove'       => 'reeve',
  'rung'       => 'ring',
  'said'       => 'say',
  'sang'       => 'sing',
  'sank'       => 'sink',
  'sat'        => 'sit',
  'saw'        => 'see',
  'sawed'        => 'saw',
  'sawn'       => 'saw',
  'seen'       => 'see',
  'sent'       => 'send',
  'sewed'        => 'sew',
  'sewn'       => 'sew',
  'shaken'     => 'shake',
  'shaved'     => 'shave',
  'shaven'     => 'shave',
  'sheared'      => 'shear',
  'shined'     => 'shine',
  'shod'       => 'shoe',
  'shoed'        => 'shoe',
  'shone'        => 'shine',
  'shook'        => 'shake',
  'shorn'        => 'shear',
  'shot'       => 'shoot',
  'showed'     => 'show',
  'shown'        => 'show',
  'shrank'     => 'shrink',
  'shredded'     => 'shred',
  'shrived'      => 'shrive',
  'shriven'      => 'shrive',
  'shrove'     => 'shrive',
  'shrunk'     => 'shrink',
  'shrunken'     => 'shrink',
  'slain'        => 'slay',
  'slept'        => 'sleep',
  'slew'       => 'slay',
  'slid'       => 'slide',
  'slidden'      => 'slide',
  'slung'        => 'sling',
  'slunk'        => 'slink',
  'smelled'      => 'smell',
  'smelt'        => 'smell',
  'smitten'      => 'smite',
  'smote'        => 'smite',
  'snuck'        => 'sneak',
  'sold'       => 'sell',
  'sought'     => 'seek',
  'sowed'        => 'sow',
  'sown'       => 'sow',
  'span'       => 'spin',
  'spat'       => 'spit',
  'sped'       => 'speed',
  'speeded'      => 'speed',
  'spelled'      => 'spell',
  'spelt'        => 'spell',
  'spent'        => 'spend',
  'spilled'      => 'spill',
  'spilt'        => 'spill',
  'spoiled'      => 'spoil',
  'spoilt'     => 'spoil',
  'spoke'        => 'speak',
  'spoken'     => 'speak',
  'sprang'     => 'spring',
  'sprung'     => 'spring',
  'spun'       => 'spin',
  'stank'        => 'stink',
  'staved'     => 'stave',
  'stole'        => 'steal',
  'stolen'     => 'steal',
  'stood'        => 'stand',
  'stove'        => 'stave',
  'strewed'      => 'strew',
  'strewn'     => 'strew',
  'stricken'     => 'strike',
  'strid'        => 'stride',
  'stridden'     => 'stride',
  'strived'      => 'strive',
  'striven'      => 'strive',
  'strode'     => 'stride',
  'strove'     => 'strive',
  'struck'     => 'strike',
  'strung'     => 'string',
  'stuck'        => 'stick',
  'stung'        => 'sting',
  'stunk'        => 'stink',
  'sung'       => 'sing',
  'sunk'       => 'sink',
  'sunken'     => 'sink',
  'swam'       => 'swim',
  'sweated'      => 'sweat',
  'swelled'      => 'swell',
  'swept'        => 'sweep',
  'swollen'      => 'swell',
  'swore'        => 'swear',
  'sworn'        => 'swear',
  'swum'       => 'swim',
  'swung'        => 'swing',
  'taken'        => 'take',
  'taught'     => 'teach',
  'thought'      => 'think',
  'threw'        => 'throw',
  'thrived'      => 'thrive',
  'thriven'      => 'thrive',
  'throve'     => 'thrive',
  'thrown'     => 'throw',
  'told'       => 'tell',
  'took'       => 'take',
  'tore'       => 'tear',
  'torn'       => 'tear',
  'trod'       => 'tread',
  'trodden'      => 'tread',
  'unbent'     => 'unbend',
  'unbound'      => 'unbind',
  'unbuilt'      => 'unbuild',
  'underbought'    => 'underbuy',
  'underfed'     => 'underfeed',
  'undergone'      => 'undergo',
  'underlaid'      => 'underlay',
  'underlain'      => 'underlie',
  'underlay'     => 'underlie',
  'underpaid'      => 'underpay',
  'underran'     => 'underrun',
  'undershot'      => 'undershoot',
  'undersold'      => 'undersell',
  'understood'   => 'understand',
  'undertaken'   => 'undertake',
  'undertook'      => 'undertake',
  'underwent'      => 'undergo',
  'underwritten'   => 'underwrite',
  'underwrote'   => 'underwrite',
  'undid'        => 'undo',
  'undone'     => 'undo',
  'undrawn'      => 'undraw',
  'undrew'     => 'undraw',
  'unfroze'      => 'unfreeze',
  'unfrozen'     => 'unfreeze',
  'ungirded'     => 'ungird',
  'ungirt'     => 'ungird',
  'unhanged'     => 'unhang',
  'unhung'     => 'unhang',
  'unknitted'      => 'unknit',
  'unladed'      => 'unlade',
  'unladen'      => 'unlade',
  'unlaid'     => 'unlay',
  'unlearned'      => 'unlearn',
  'unlearnt'     => 'unlearn',
  'unmade'     => 'unmake',
  'unreeved'     => 'unreeve',
  'unrove'     => 'unreeve',
  'unsaid'     => 'unsay',
  'unslung'      => 'unsling',
  'unspoke'      => 'unspeak',
  'unspoken'     => 'unspeak',
  'unstrung'     => 'unstring',
  'unstuck'      => 'unstick',
  'unswore'      => 'unswear',
  'unsworn'      => 'unswear',
  'untaught'     => 'unteach',
  'unthought'      => 'unthink',
  'untrod'     => 'untread',
  'untrodden'      => 'untread',
  'unwinded'     => 'unwind',
  'unwound'      => 'unwind',
  'unwove'     => 'unweave',
  'unwoven'      => 'unweave',
  'upbuilt'      => 'upbuild',
  'upheld'     => 'uphold',
  'uprisen'      => 'uprise',
  'uprose'     => 'uprise',
  'upswept'      => 'upsweep',
  'upswung'      => 'upswing',
  'waked'        => 'wake',
  'was'        => 'be',
  'waylaid'      => 'waylay',
  'wedded'     => 'wed',
  'went'       => 'go',
  'wept'       => 'weep',
  'were'       => 'be',
  'wetted'     => 'wet',
  'winded'     => 'wind',
  'wist'       => 'wit',
  'wot'        => 'wit',
  'withdrawn'      => 'withdraw',
  'withdrew'     => 'withdraw',
  'withheld'     => 'withhold',
  'withstood'      => 'withstand',
  'woke'       => 'wake',
  'woken'        => 'wake',
  'won'        => 'win',
  'wore'       => 'wear',
  'worked'     => 'work',
  'worn'       => 'wear',
  'wound'        => 'wind',
  'wove'       => 'weave',
  'woven'        => 'weave',
  'written'      => 'write',
  'wrote'        => 'write',
  'wrought'      => 'work',
  'wrung'        => 'wring',
}

InfSuffixRules = Mapping of word suffixes to infinitive rules.

{
  # '<suffix>' => {
  #  :order => <sort order>,
  #  :rule  => <rule number>,

  # :word1 == 0 => Use 0, the index of the longest prefix
  #  within @{$prefix{$self->{'suffix'} } }, below.

  # :word1 == 1 => Use 1, the index of the 2nd longest prefix
  #  within @{$prefix{$self->{'suffix'} } }, below.

  # :word1 == -1 => Use the index of the shortest prefix
  #  within @{$prefix{$self->{'suffix'} } }, below + a letter.

  # :word1 == -2 => Use the index of the shortest prefix
  #  within @{$prefix{$self->{'suffix'} } }, below + a letter,
  #  and use the shortest prefix as well.

  # :word1 == -3 => Use the index of the shortest prefix
  #  within @{$prefix{$self->{'suffix'} } }, below + meter,
  #  and use the shortest prefix + metre as well.

  # :word1 == -4 => Use the original string.
  'hes' => {
    :order    => 1011,
    :rule   => '1',
    :word1    => 0, # Longest prefix.
    :suffix1  => '',
    :suffix2  => '',
  },
  'ses' => {
    :order    => 1021,
    :rule   => '2',
    :word1    => 0, # Longest prefix.
    :suffix1  => '',
    :suffix2  => '',
  },
  'xes' => {
    :order    => 1031,
    :rule   => '3',
    :word1    => 0, # Longest prefix.
    :suffix1  => '',
    :suffix2  => '',
  },
  'zes' => {
    :order    => 1041,
    :rule   => '4',
    :word1    => 0, # Longest prefix.
    :suffix1  => '',
    :suffix2  => '',
  },
  'iless' => {
    :order    => 1051,
    :rule   => '43a',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'y',
    :suffix2  => '',
  },
  'less' => {
    :order    => 1052,
    :rule   => '43b',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => '',
    :suffix2  => '',
  },
  'iness' => {
    :order    => 1053,
    :rule   => '44a',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'y',
    :suffix2  => '',
  },
  'ness' => {
    :order    => 1054,
    :rule   => '44b',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => '',
    :suffix2  => '',
  },
  "'s" => {
    :order    => 1055,
    :rule   => '7',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => '',
    :suffix2  => '',
  },
  'ies' => {
    :order    => 1056,
    :rule   => '13a',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'y',
    :suffix2  => '',
  },
  'es' => {
    :order    => 1057,
    :rule   => '13b',
    :word1    => 0, # Longest prefix.
    :suffix1  => '',
    :suffix2  => '',
  },
  'ss' => {
    :order    => 1061,
    :rule   => '6a',
    :word1    => -4,  # Original string.
    :suffix1  => '',
    :suffix2  => '',
  },
  's'   => {
    :order    => 1062,
    :rule   => '6b',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => '',
    :suffix2  => '',
  },
  'ater' => {
    :order    => 1081,
    :rule   => '8',
    :word1    => -4,  # Original string.
    :suffix1  => '',
    :suffix2  => '',
  },
  'cter' => {
    :order    => 1091,
    :rule   => '9',
    :word1    => -4,  # Original string.
    :suffix1  => '',
    :suffix2  => '',
  },
  'ier' => {
    :order    => 1101,
    :rule   => '10',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'y',
    :suffix2  => '',
  },
  'er' => {
    :order    => 1111,
    :rule   => '11',
    :word1    => 0, # Longest prefix.
    :suffix1  => '',
    :suffix2  => '',
  },
  'ied' => {
    :order    => 1121,
    :rule   => '12a',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'y',
    :suffix2  => '',
  },
  'ed' => {
    :order    => 1122,
    :rule   => '12b',  # There is extra code for 12b below.
    :word1    => 0, # Longest prefix.
    :suffix1  => '',
    :suffix2  => '',
  },
  'iest' => {
    :order    => 1141,
    :rule   => '14a',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'y',
    :suffix2  => '',
  },
  'est' => {
    :order    => 1142,
    :rule   => '14b',
    :word1    => -2,  # Shortest prefix + a letter, and shortest prefix.
    :suffix1  => 'e',
    :suffix2  => '',
  },
  'blity' => {
    :order    => 1143,
    :rule   => '21',
    :word1    => -4,  # Original string.
    :suffix1  => '',
    :suffix2  => '',
  },
  'bility' => {
    :order    => 1144,
    :rule   => '22',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'ble',
    :suffix2  => '',
  },
  'fiable' => {
    :order    => 1145,
    :rule   => '23',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'fy',
    :suffix2  => '',
  },
  'logist' => {
    :order    => 1146,
    :rule   => '24',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'logy',
    :suffix2  => '',
  },
  'ing' => {
    :order    => 1151,
    :rule   => '15', # There is extra code for 15 below.
    :word1    => -2,  # Shortest prefix + a letter, and shortest prefix.
    :suffix1  => 'e',
    :suffix2  => '',
  },
  'ist' => {
    :order    => 1161,
    :rule   => '16',
    :word1    => -2,  # Shortest prefix + a letter, and shortest prefix.
    :suffix1  => 'e',
    :suffix2  => '',
  },
  'ism' => {
    :order    => 1171,
    :rule   => '17',
    :word1    => -2,  # Shortest prefix + a letter, and shortest prefix.
    :suffix1  => 'e',
    :suffix2  => '',
  },
  'ity' => {
    :order    => 1181,
    :rule   => '18',
    :word1    => -2,  # Shortest prefix + a letter, and shortest prefix.
    :suffix1  => 'e',
    :suffix2  => '',
  },
  'ize' => {
    :order    => 1191,
    :rule   => '19',
    :word1    => -2,  # Shortest prefix + a letter, and shortest prefix.
    :suffix1  => 'e',
    :suffix2  => '',
  },
  'cable' => {
    :order    => 1201,
    :rule   => '20a',
    :word1    => -4,  # Original string.
    :suffix1  => '',
    :suffix2  => '',
  },
  'gable' => {
    :order    => 1202,
    :rule   => '20b',
    :word1    => -4,  # Original string.
    :suffix1  => '',
    :suffix2  => '',
  },
  'able' => {
    :order    => 1203,
    :rule   => '20c',
    :word1    => -2,  # Shortest prefix + a letter, and shortest prefix.
    :suffix1  => 'e',
    :suffix2  => '',
  },
  'graphic' => {
    :order    => 1251,
    :rule   => '25',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'graphy',
    :suffix2  => '',
  },
  'istic' => {
    :order    => 1261,
    :rule   => '26',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'ist',
    :suffix2  => '',
  },
  'itic' => {
    :order    => 1271,
    :rule   => '27',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'ite',
    :suffix2  => '',
  },
  'like' => {
    :order    => 1281,
    :rule   => '28',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => '',
    :suffix2  => '',
  },
  'logic' => {
    :order    => 1291,
    :rule   => '29',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'logy',
    :suffix2  => '',
  },
  'ment' => {
    :order    => 1301,
    :rule   => '30',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => '',
    :suffix2  => '',
  },
  'mental' => {
    :order    => 1311,
    :rule   => '31',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'ment',
    :suffix2  => '',
  },
  'metry' => {
    :order    => 1321,
    :rule   => '32',
    :word1    => -3,  # Shortest prefix + meter, and shortest perfix + metre.
    :suffix1  => 'meter',
    :suffix2  => 'metre',
  },
  'nce' => {
    :order    => 1331,
    :rule   => '33',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'nt',
    :suffix2  => '',
  },
  'ncy' => {
    :order    => 1341,
    :rule   => '34',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'nt',
    :suffix2  => '',
  },
  'ship' => {
    :order    => 1351,
    :rule   => '35',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => '',
    :suffix2  => '',
  },
  'ical' => {
    :order    => 1361,
    :rule   => '36',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'ic',
    :suffix2  => '',
  },
  'ional' => {
    :order    => 1371,
    :rule   => '37',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'ion',
    :suffix2  => '',
  },
  'bly' => {
    :order    => 1381,
    :rule   => '38',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'ble',
    :suffix2  => '',
  },
  'ily' => {
    :order    => 1391,
    :rule   => '39',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'y',
    :suffix2  => '',
  },
  'ly' => {
    :order    => 1401,
    :rule   => '40',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => '',
    :suffix2  => '',
  },
  'iful' => {
    :order    => 1411,
    :rule   => '41a',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'y',
    :suffix2  => '',
  },
  'ful' => {
    :order    => 1412,
    :rule   => '41b',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => '',
    :suffix2  => '',
  },
  'ihood' => {
    :order    => 1421,
    :rule   => '42a',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'y',
    :suffix2  => '',
  },
  'hood' => {
    :order    => 1422,
    :rule   => '42b',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => '',
    :suffix2  => '',
  },
  'ification' => {
    :order    => 1451,
    :rule   => '45',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'ify',
    :suffix2  => '',
  },
  'ization' => {
    :order    => 1461,
    :rule   => '46',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'ize',
    :suffix2  => '',
  },
  'ction' => {
    :order    => 1471,
    :rule   => '47',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'ct',
    :suffix2  => '',
  },
  'rtion' => {
    :order    => 1481,
    :rule   => '48',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'rt',
    :suffix2  => '',
  },
  'ation' => {
    :order    => 1491,
    :rule   => '49',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'ate',
    :suffix2  => '',
  },
  'ator' => {
    :order    => 1501,
    :rule   => '50',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'ate',
    :suffix2  => '',
  },
  'ctor' => {
    :order    => 1511,
    :rule   => '51',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'ct',
    :suffix2  => '',
  },
  'ive' => {
    :order    => 1521,
    :rule   => '52',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'ion',
    :suffix2  => '',
  },
  'onian' => {
    :order    => 1530,
    :rule   => '54',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'on',
    :suffix2  => '',
  },
  'an' => {
    :order    => 1531,
    :rule   => '53',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'a',
    :suffix2  => '',
  },
}

InfSuffixRuleOrder =

InfSuffixRules.keys.sort_by {|rule| InfSuffixRules[rule][:order]}

Class Method Summary collapse

.a(phrase, count = nil) ⇒ Object (also: an, A, AN)

Return the given phrase with the appropriate indefinite article (“a” or “an”) prepended.
.conjunction(obj, args = {}) ⇒ Object

Return the specified obj (which must support the #collect method) as a conjunction.
.debugMsg(*msgs) ⇒ Object

Debugging output.
.def_synset_function(meth) ⇒ Object

Make a function that calls the method meth on the synset of an input word.
.hasLinkParser? ⇒ Boolean

Returns true if LinkParser was loaded okay.
.hasWordnet? ⇒ Boolean

Returns true if WordNet was loaded okay.
.indef_article(word, count) ⇒ Object

Returns the given word with a prepended indefinite article, unless count is non-nil and not singular.
.infinitive(word) ⇒ Object

Return the infinitive form of the given word.
.language ⇒ Object

Return the name of the language this module is for.
.linkParse(sent) ⇒ Object (also: sentence)

Return a LinkParser::Sentence, with or without a sentence in it.
.linkParser ⇒ Object

The instance of LinkParser used for all Linguistics LinkParser functions.
.lpError ⇒ Object

If #hasLinkParser? returns false, this can be called to fetch the exception which was raised when trying to load LinkParser.
.matchgroup(*parts) ⇒ Object

Wrap one or more parts in a non-capturing alteration Regexp.
.no(phrase, count = nil) ⇒ Object (also: NO)

Translate zero-quantified phrase to “no phrase.plural”.
.normalizeCount(count, default = 2) ⇒ Object

Normalize a count to either 1 or 2 (singular or plural).
.number_to_words(num, config) ⇒ Object

Return the specified number num as an array of number phrases.
.numwords(number, hashargs = {}) ⇒ Object

Return the specified number as english words.
.ordinal(number) ⇒ Object (also: ORD)

Transform the given number into an ordinal word.
.plural(phrase, count = nil) ⇒ Object (also: PL)

Return the plural of the given phrase if count indicates it should be plural.
.plural_adjective(phrase, count = nil) ⇒ Object (also: plural_adj, PL_ADJ)

Return the plural of the given adjectival phrase if count indicates it should be plural.
.plural_noun(phrase, count = nil) ⇒ Object (also: PL_N)

Return the plural of the given noun phrase if count indicates it should be plural.
.plural_verb(phrase, count = nil) ⇒ Object (also: PL_V)

Return the plural of the given verb phrase if count indicates it should be plural.
.pluralize_general_verb(word, count) ⇒ Object

Pluralize regular verbs.
.pluralize_noun(word, count = nil) ⇒ Object

Pluralize nouns.
.pluralize_special_adjective(word, count) ⇒ Object

Handle special adjectives.
.pluralize_special_verb(word, count) ⇒ Object

Pluralize special verbs.
.postprocess(original, inflected) ⇒ Object

Do normal/classical switching and match capitalization in inflected by examining the original input.
.present_participle(word) ⇒ Object (also: part_pres, PART_PRES)

Participles.
.quantify(phrase, number = 0, args = {}) ⇒ Object
:joinword

Sets the word (and any surrounding spaces) used as the word separating the quantity from the noun in the resulting string.
.synset(word, pos = nil, sense = 1) ⇒ Object

Look up the synset associated with the given word or collocation in the WordNet lexicon and return a WordNet::Synset object.
.synsets(word, pos = nil) ⇒ Object

Look up all the synsets associated with the given word or collocation in the WordNet lexicon and return an Array of WordNet::Synset objects.
.to_hundreds(hundreds, tens = 0, units = 0, thousands = 0, joinword = " and ") ⇒ Object

Transform the specified number of hundreds-, tens-, and units-place numerals into a word phrase.
.to_tens(tens, units, thousands = 0) ⇒ Object

Transform the specified number of tens- and units-place numerals into a word-phrase at the given number of thousands places.
.to_thousands(thousands = 0) ⇒ Object

Transform the specified number into one or more words like ‘thousand’, ‘million’, etc.
.to_units(units, thousands = 0) ⇒ Object

Transform the specified number of units-place numerals into a word-phrase at the given number of thousands places.
.wnError ⇒ Object

If #haveWordnet? returns false, this can be called to fetch the exception which was raised when WordNet was loaded.
.wnLexicon ⇒ Object

The instance of the WordNet::Lexicon used for all Linguistics WordNet functions.

Class Method Details

.a(phrase, count = nil) ⇒ `Object` Also known as: an, A, AN

Return the given phrase with the appropriate indefinite article (“a” or “an”) prepended.

# File 'lib/linguistics/en.rb', line 1152

def a( phrase, count=nil )
  md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
  pre, word, post = md.to_a[1,3]
  return phrase if word.nil? or word.empty?

  result = indef_article( word, count )
  return pre + result + post
end

.conjunction(obj, args = {}) ⇒ `Object`

Return the specified obj (which must support the #collect method) as a conjunction. Each item is converted to a String if it is not already (using #to_s) unless a block is given, in which case it is called once for each object in the array, and the stringified return value from the block is used instead. Returning nil causes that particular element to be omitted from the resulting conjunction. The following options can be used to control the makeup of the returned conjunction String:

:separator

Specify one or more characters to separate items in the resulting list. Defaults to ', '.

:altsep

An alternate separator to use if any of the resulting conjunction’s clauses contain the :separator character/s. Defaults to '; '.

:penultimate

Flag that indicates whether or not to join the last clause onto the rest of the conjunction using a penultimate :separator. E.g.,

%w{duck, cow, dog}.en.conjunction
# => "a duck, a cow, and a dog"
%w{duck cow dog}.en.conjunction( :penultimate => false )
"a duck, a cow and a dog"

Default to true.

:conjunctive

Sets the word used as the conjunctive (separating word) of the resulting string. Default to 'and'.

:combine

If set to true (the default), items which are indentical (after surrounding spaces are stripped) will be combined in the resulting conjunction. E.g.,

%w{goose cow goose dog}.en.conjunction
# => "two geese, a cow, and a dog"
%w{goose cow goose dog}.en.conjunction( :combine => false )
# => "a goose, a cow, a goose, and a dog"

:casefold

If set to true (the default), then items are compared case-insensitively when combining them. This has no effect if :combine is false.

:generalize

If set to true, then quantities of combined items are turned into general descriptions instead of exact amounts.

ary = %w{goose pig dog horse goose reindeer goose dog horse}
ary.en.conjunction
# => "three geese, two dogs, two horses, a pig, and a reindeer"
ary.en.conjunction( :generalize => true )
# => "several geese, several dogs, several horses, a pig, and a reindeer"

See the #quantify method for specifics on how quantities are generalized. Generalization defaults to false, and has no effect if :combine is false.

:quantsort

If set to true (the default), items which are combined in the resulting conjunction will be listed in order of amount, with greater quantities sorted first. If :quantsort is false, combined items will appear where the first instance of them occurred in the list. This sort is also the fallback for indentical quantities (ie., items of the same quantity will be listed in the order they appeared in the source list).

# File 'lib/linguistics/en.rb', line 1462

def conjunction( obj, args={} )
  config = ConjunctionDefaults.dup.update( args )
  phrases = []

  # Transform items in the obj to phrases
  if block_given?
    phrases = obj.collect {|item| yield(item) }.compact
  else
    phrases = obj.collect {|item| item.to_s }
  end

  # No need for a conjunction if there's only one thing
  return a(phrases[0]) if phrases.length < 2

  # Set up a Proc to derive a collector key from a phrase depending on the
  # configuration
  keyfunc =
    if config[:casefold]
      proc {|key| key.downcase.strip}
    else
      proc {|key| key.strip}
    end
  
  # Count and delete phrases that hash the same when the keyfunc munges
  # them into the same thing if we're combining (:combine => true).
  collector = {}
  if config[:combine]
  
    phrases.each_index do |i|
      # Stop when reaching the end of a truncated list
      break if phrases[i].nil?

      # Make the key using the configured key function
      phrase = keyfunc[ phrases[i] ]

      # If the collector already has this key, increment its count,
      # eliminate the duplicate from the phrase list, and redo the loop.
      if collector.key?( phrase )
        collector[ phrase ] += 1
        phrases.delete_at( i )
        redo
      end

      collector[ phrase ] = 1
    end
  else
    # If we're not combining, just make everything have a count of 1.
    phrases.uniq.each {|key| collector[ keyfunc[key] ] = 1}
  end

  # If sort-by-quantity is turned on, sort the phrases first by how many
  # there are (most-first), and then by the order they were specified in.
  if config[:quantsort] && config[:combine]
    origorder = {}
    phrases.each_with_index {|phrase,i| origorder[ keyfunc[phrase] ] ||= i }
    phrases.sort! {|a,b|
      (collector[ keyfunc[b] ] <=> collector[ keyfunc[a] ]).nonzero? ||
      (origorder[ keyfunc[a] ] <=> origorder[ keyfunc[b] ])
    }
  end

  # Set up a filtering function that adds either an indefinite article, an
  # indefinite quantifier, or a definite quantifier to each phrase
  # depending on the configuration and the count of phrases in the
  # collector.
  filter =
    if config[:generalize]
      proc {|phrase, count| quantify(phrase, count) }
    else
      proc {|phrase, count|
      if count > 1
        "%s %s" % [
          # :TODO: Make this threshold settable
          count < 10 ? count.en.numwords : count.to_s,
          plural(phrase, count)
        ]
      else
        a( phrase )
      end
    }
    end

  # Now use the configured filter to turn each phrase into its final
  # form. Hmmm... square-bracket Lisp?
  phrases.collect! {|phrase| filter[phrase, collector[ keyfunc[phrase] ]] }

  # Prepend the conjunctive to the last element unless it's empty or
  # there's only one element
  phrases[-1].insert( 0, config[:conjunctive] + " " ) unless
    config[:conjunctive].strip.empty? or
    phrases.length < 2

  # Catenate the last two elements if there's no penultimate separator,
  # and pick a separator based on how many phrases there are and whether
  # or not there's already an instance of it in the phrases.
  phrases[-2] << " " << phrases.pop unless config[:penultimate]
  sep = if phrases.length <= 2
        ' '
      elsif phrases.grep( /#{config[:separator]}/ ).empty?
        config[:separator]
      else
        config[:altsep]
      end

  return phrases.join( sep )
end

.debugMsg(*msgs) ⇒ `Object`

Debugging output



645
646
647

# File 'lib/linguistics/en.rb', line 645

def debugMsg( *msgs ) # :nodoc:
  $stderr.puts msgs.join(" ") if $DEBUG
end

.def_synset_function(meth) ⇒ `Object`

Make a function that calls the method meth on the synset of an input word.

# File 'lib/linguistics/en/wordnet.rb', line 127

def def_synset_function( meth )
  (class << self; self; end).instance_eval do
    define_method( meth ) {|*args|
      word, pos, sense = *args
      raise ArgumentError,
        "wrong number of arguments (0 for 1)" unless word
      sense ||= 1

      syn = synset( word.to_s, pos, sense )
      return syn.nil? ? nil : syn.send( meth )
    }
  end
end

.hasLinkParser? ⇒ `Boolean`

Returns true if LinkParser was loaded okay

Returns:

(Boolean)

97	# File 'lib/linguistics/en/linkparser.rb', line 97 def hasLinkParser? ; @hasLinkParser ; end

.hasWordnet? ⇒ `Boolean`

Returns true if WordNet was loaded okay

Returns:

(Boolean)

107	# File 'lib/linguistics/en/wordnet.rb', line 107 def hasWordnet? ; @hasWordnet; end

.indef_article(word, count) ⇒ `Object`

Returns the given word with a prepended indefinite article, unless count is non-nil and not singular.

# File 'lib/linguistics/en.rb', line 922

def indef_article( word, count )
  count ||= Linguistics::num
  return "#{count} #{word}" if
    count && /^(#{PL_count_one})$/i !~ count.to_s

  # Handle user-defined variants
  # return value if value = ud_match( word, A_a_user_defined )

  case word

  # Handle special cases
  when /^(#{A_explicit_an})/i
    return "an #{word}"

  # Handle abbreviations
  when /^(#{A_abbrev})/x
    return "an #{word}"
  when /^[aefhilmnorsx][.-]/i
    return "an #{word}"
  when /^[a-z][.-]/i 
    return "a #{word}"

  # Handle consonants
  when /^[^aeiouy]/i
    return "a #{word}"

  # Handle special vowel-forms
  when /^e[uw]/i 
    return "a #{word}"
  when /^onc?e\b/i 
    return "a #{word}"
  when /^uni([^nmd]|mo)/i
    return "a #{word}"
  when /^u[bcfhjkqrst][aeiou]/i
    return "a #{word}"

  # Handle vowels
  when /^[aeiou]/i
    return "an #{word}"

  # Handle y... (before certain consonants implies (unnaturalized) "i.." sound)
  when /^(#{A_y_cons})/i
    return "an #{word}"

  # Otherwise, guess "a"
  else
    return "a #{word}"
  end
end

.infinitive(word) ⇒ `Object`

Return the infinitive form of the given word

# File 'lib/linguistics/en/infinitive.rb', line 1050

def infinitive( word )
  word = word.to_s
  word1 = word2 = suffix = rule = newword = ''

  if IrregularInfinitives.key?( word )
    word1 = IrregularInfinitives[ word ]
    rule  = 'irregular'
  else
    # Build up $prefix{$suffix} as an array of prefixes, from longest to shortest.
    prefix, suffix = nil
    prefixes = Hash::new {|hsh,key| hsh[key] = []}

    # Build the hash of prefixes for the word
    1.upto( word.length ) {|i|
      prefix = word[0, i]
      suffix = word[i..-1]

      (suffix.length - 1).downto( 0 ) {|j|
        newword = prefix + suffix[0, j]
        prefixes[ suffix ].push( newword )
      }
    }

    $stderr.puts "prefixes: %p" % prefixes if $DEBUG

    # Now check for rules covering the prefixes for this word, picking
    # the first one if one was found.
    if (( suffix = ((InfSuffixRuleOrder & prefixes.keys).first) ))
      rule = InfSuffixRules[ suffix ][:rule]
      shortestPrefix = InfSuffixRules[ suffix ][:word1]
      $stderr.puts "Using rule %p (%p) for suffix %p" % 
        [ rule, shortestPrefix, suffix ] if $DEBUG

      case shortestPrefix
      when 0
        word1 = prefixes[ suffix ][ 0 ]
        word2 = prefixes[ suffix ][ 1 ]
        $stderr.puts "For sp = 0: word1: %p, word2: %p" %
          [ word1, word2 ] if $DEBUG

      when -1
        word1 = prefixes[ suffix ].last +
          InfSuffixRules[ suffix ][:suffix1]
        word2 = ''
        $stderr.puts "For sp = -1: word1: %p, word2: %p" %
          [ word1, word2 ] if $DEBUG

      when -2
        word1 = prefixes[ suffix ].last +
          InfSuffixRules[ suffix ][:suffix1]
        word2 = prefixes[ suffix ].last
        $stderr.puts "For sp = -2: word1: %p, word2: %p" %
          [ word1, word2 ] if $DEBUG

      when -3
        word1 = prefixes[ suffix ].last +
          InfSuffixRules[ suffix ][:suffix1]
        word2 = prefixes[ suffix ].last +
          InfSuffixRules[ suffix ][:suffix2]
        $stderr.puts "For sp = -3: word1: %p, word2: %p" %
          [ word1, word2 ] if $DEBUG

      when -4
        word1 = word
        word2 = ''
        $stderr.puts "For sp = -4: word1: %p, word2: %p" %
          [ word1, word2 ] if $DEBUG

      else
        raise IndexError,
          "Couldn't find rule for shortest prefix %p" %
          shortestPrefix
      end

      # Rules 12b and 15: Strip off 'ed' or 'ing'.
      if rule == '12b' or rule == '15'
        # Do we have a monosyllable of this form:
        # o 0+ Consonants
        # o 1+ Vowel
        # o 2 Non-wx
        # Eg: tipped => tipp?
        # Then return tip and tipp.
        # Eg: swimming => swimm?
        # Then return tipswim and swimm.

        if /^([^aeiou]*[aeiou]+)([^wx])\2$/ =~ word2
          word1 = $1 + $2
          word2 = $1 + $2 + $2
        end
      end
    end
  end

  return Infinitive::new( word1, word2, suffix, rule )
end

.language ⇒ `Object`

Return the name of the language this module is for.



1085
1086
1087

# File 'lib/linguistics/en.rb', line 1085

def language
  "English"
end

.linkParse(sent) ⇒ `Object` Also known as: sentence

Return a LinkParser::Sentence, with or without a sentence in it.



136
137
138

# File 'lib/linguistics/en/linkparser.rb', line 136

def linkParse( sent )
  return Linguistics::EN::linkParser.parse( sent.to_s )
end

.linkParser ⇒ `Object`

The instance of LinkParser used for all Linguistics LinkParser functions.

# File 'lib/linguistics/en/linkparser.rb', line 105

def linkParser
  if @lpError
    raise NotImplementedError, 
      "LinkParser functions are not loaded: %s" %
      @lpError.message
  end

  return @lpParser if ! @lpParser.nil?

  LinkParser::Word::extend( Linguistics )
  Linguistics::installDelegatorProxy( LinkParser::Word, :en )

  dictOpts = Hash.new('')
  dictOpts['datadir'] = 'ElizaData'
dictOpts['dict'] = 'tiny.dict'
  parseOpts = Hash.new

  @lpParser = LinkParser.new( dictOpts, parseOpts )
end

.lpError ⇒ `Object`

If #hasLinkParser? returns false, this can be called to fetch the exception which was raised when trying to load LinkParser.

101	# File 'lib/linguistics/en/linkparser.rb', line 101 def lpError ; @lpError ; end

.matchgroup(*parts) ⇒ `Object`

Wrap one or more parts in a non-capturing alteration Regexp

# File 'lib/linguistics/en.rb', line 121

def self::matchgroup( *parts )
  re = parts.flatten.join("|")
  "(?:#{re})"
end

.no(phrase, count = nil) ⇒ `Object` Also known as: NO

Translate zero-quantified phrase to “no phrase.plural”

# File 'lib/linguistics/en.rb', line 1166

def no( phrase, count=nil )
  md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
  pre, word, post = md.to_a[1,3]
  count ||= Linguistics::num || 0

  unless /^#{PL_count_zero}$/ =~ count.to_s
    return "#{pre}#{count} " + plural( word, count ) + post
  else
    return "#{pre}no " + plural( word, 0 ) + post
  end
end

.normalizeCount(count, default = 2) ⇒ `Object`

Normalize a count to either 1 or 2 (singular or plural)

# File 'lib/linguistics/en.rb', line 651

def normalizeCount( count, default=2 )
  return default if count.nil? # Default to plural
  if /^(#{PL_count_one})$/i =~ count.to_s ||
      Linguistics::classical? &&
      /^(#{PL_count_zero})$/ =~ count.to_s
    return 1
  else
    return default
  end
end

.number_to_words(num, config) ⇒ `Object`

Return the specified number num as an array of number phrases.

# File 'lib/linguistics/en.rb', line 1027

def number_to_words( num, config )
  return [config[:zero]] if num.to_i.zero?
  chunks = []

  # Break into word-groups if groups is set
  if config[:group].nonzero?

    # Build a Regexp with <config[:group]> number of digits. Any past
    # the first are optional.
    re = Regexp::new( "(\\d)" + ("(\\d)?" * (config[:group] - 1)) )

    # Scan the string, and call the word-chunk function that deals with
    # chunks of the found number of digits.
    num.to_s.scan( re ) {|digits|
      debugMsg "   digits = #{digits.inspect}"
      fn = NumberToWordsFunctions[ digits.nitems ]
      numerals = digits.flatten.compact.collect {|i| i.to_i}
      debugMsg "   numerals = #{numerals.inspect}"
      chunks.push fn.call( config[:zero], *numerals ).strip
    }
  else
    phrase = num.to_s
    phrase.sub!( /\A\s*0+/, '' )
    mill = 0

    # Match backward from the end of the digits in the string, turning
    # chunks of three, of two, and of one into words.
    mill += 1 while
      phrase.sub!( /(\d)(\d)(\d)(?=\D*\Z)/ ) {
        words = to_hundreds( $1.to_i, $2.to_i, $3.to_i, mill, 
                   config[:and] )
        chunks.unshift words.strip.squeeze(' ') unless words.nil?
        ''
      }        

    phrase.sub!( /(\d)(\d)(?=\D*\Z)/ ) {
      chunks.unshift to_tens( $1.to_i, $2.to_i, mill ).strip.squeeze(' ')
      ''
    }
    phrase.sub!( /(\d)(?=\D*\Z)/ ) {
      chunks.unshift to_units( $1.to_i, mill ).strip.squeeze(' ')
      ''
    }
  end

  return chunks
end

.numwords(number, hashargs = {}) ⇒ `Object`

Return the specified number as english words. One or more configuration values may be passed to control the returned String:

:group: Controls how many numbers at a time are grouped together. Valid values are 0 (normal grouping), 1 (single-digit grouping, e.g., “one, two, three, four”), 2 (double-digit grouping, e.g., “twelve, thirty-four”, or 3 (triple-digit grouping, e.g., “one twenty-three, four”).
:comma: Set the character/s used to separate word groups. Defaults to “, ”.
:and: Set the word and/or characters used where ‘ and ’ (the default) is normally used. Setting :and to ‘ ’, for example, will cause 2556 to be returned as “two-thousand, five hundred fifty-six” instead of “”two-thousand, five hundred and fifty-six“.
:zero: Set the word used to represent the numeral 0 in the result. ‘zero’ is the default.
:decimal: Set the translation of any decimal points in the number; the default is ‘point’.
:asArray: If set to a true value, the number will be returned as an array of word groups instead of a String.

# File 'lib/linguistics/en.rb', line 1224

def numwords( number, hashargs={} )
  num = number.to_s
  config = NumwordDefaults.dup.update( hashargs )
  raise "Bad chunking option: #{config[:group]}" unless
    config[:group].between?( 0, 3 )

  # Array of number parts: first is everything to the left of the first
  # decimal, followed by any groups of decimal-delimted numbers after that
  parts = []

  # Wordify any sign prefix
  sign = (/\A\s*\+/ =~ num) ? 'plus' : (/\A\s*\-/ =~ num) ? 'minus' : ''

  # Strip any ordinal suffixes
  ord = true if num.sub!( /(st|nd|rd|th)\Z/, '' )

  # Split the number into chunks delimited by '.'
  chunks = if !config[:decimal].empty? then
         if config[:group].nonzero?
           num.split(/\./)
         else
           num.split(/\./, 2)
         end
       else
         [ num ]
       end

  # Wordify each chunk, pushing arrays into the parts array
  chunks.each_with_index {|chunk,section|
    chunk.gsub!( /\D+/, '' )

    # If there's nothing in this chunk of the number, set it to zero
    # unless it's the whole-number part, in which case just push an
    # empty array.
    if chunk.empty?
      if section.zero?
        parts.push []
        next 
      end
    end

    # Split the number section into wordified parts unless this is the
    # second or succeeding part of a non-group number
    unless config[:group].zero? && section.nonzero?
      parts.push number_to_words( chunk, config )
    else
      parts.push number_to_words( chunk, config.dup.update(:group => 1) )
    end         
  }

  debugMsg "Parts => #{parts.inspect}"
  
  # Turn the last word of the whole-number part back into an ordinal if
  # the original number came in that way.
  if ord && !parts[0].empty?
    parts[0][-1] = ordinal( parts[0].last )
  end

  # If the caller's expecting an Array return, just flatten and return the
  # parts array.
  if config[:asArray]
    unless sign.empty?
      parts[0].unshift( sign )
    end
    return parts.flatten
  end

  # Catenate each sub-parts array into a whole number part and one or more
  # post-decimal parts. If grouping is turned on, all sub-parts get joined
  # with commas, otherwise just the whole-number part is.
  if config[:group].zero?
    if parts[0].nitems > 1

      # Join all but the last part together with commas
      wholenum = parts[0][0...-1].join( config[:comma] )

      # If the last part is just a single word, append it to the
      # wholenum part with an 'and'. This is to get things like 'three
      # thousand and three' instead of 'three thousand, three'.
      if /^\s*(\S+)\s*$/ =~ parts[0].last
        wholenum += " and #{parts[0].last}"
      else
        wholenum += config[:comma] + parts[0].last
      end
    else
      wholenum = parts[0][0]
    end
    decimals = parts[1..-1].collect {|part| part.join(" ")}

    debugMsg "Wholenum: #{wholenum.inspect}; decimals: #{decimals.inspect}"

    # Join with the configured decimal; if it's empty, just join with
    # spaces.
    unless config[:decimal].empty?
      return sign + ([ wholenum ] + decimals).
        join( " #{config[:decimal]} " ).strip
    else
      return sign + ([ wholenum ] + decimals).
        join( " " ).strip
    end
  else
    return parts.compact.
      separate( config[:decimal] ).
      delete_if {|el| el.empty?}.
      join( config[:comma] ).
      strip
  end
end

.ordinal(number) ⇒ `Object` Also known as: ORD

Transform the given number into an ordinal word. The number object can be either an Integer or a String.

# File 'lib/linguistics/en.rb', line 1336

def ordinal( number )
  case number
  when Integer
    return number.to_s + (Nth[ number % 100 ] || Nth[ number % 10 ])

  else
    return number.to_s.sub( /(#{OrdinalSuffixes})\Z/ ) { Ordinals[$1] }
  end
end

.plural(phrase, count = nil) ⇒ `Object` Also known as: PL

Return the plural of the given phrase if count indicates it should be plural.

# File 'lib/linguistics/en.rb', line 1092

def plural( phrase, count=nil )
  md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
  pre, word, post = md.to_a[1,3]
  return phrase if word.nil? or word.empty?

  plural = postprocess( word,
    pluralize_special_adjective(word, count) ||
    pluralize_special_verb(word, count) ||
    pluralize_noun(word, count) )

  return pre + plural + post
end

.plural_adjective(phrase, count = nil) ⇒ `Object` Also known as: plural_adj, PL_ADJ

Return the plural of the given adjectival phrase if count indicates it should be plural.

# File 'lib/linguistics/en.rb', line 1137

def plural_adjective( phrase, count=nil )
  md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
  pre, word, post = md.to_a[1,3]
  return phrase if word.nil? or word.empty?

  plural = postprocess( word,
    pluralize_special_adjective(word, count) || word )
  return pre + plural + post
end

.plural_noun(phrase, count = nil) ⇒ `Object` Also known as: PL_N

Return the plural of the given noun phrase if count indicates it should be plural.

# File 'lib/linguistics/en.rb', line 1109

def plural_noun( phrase, count=nil )
  md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
  pre, word, post = md.to_a[1,3]
  return phrase if word.nil? or word.empty?

  plural = postprocess( word, pluralize_noun(word, count) )
  return pre + plural + post
end

.plural_verb(phrase, count = nil) ⇒ `Object` Also known as: PL_V

Return the plural of the given verb phrase if count indicates it should be plural.

# File 'lib/linguistics/en.rb', line 1122

def plural_verb( phrase, count=nil )
  md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
  pre, word, post = md.to_a[1,3]
  return phrase if word.nil? or word.empty?

  plural = postprocess( word,
    pluralize_special_verb(word, count) ||
    pluralize_general_verb(word, count) )
  return pre + plural + post
end

.pluralize_general_verb(word, count) ⇒ `Object`

Pluralize regular verbs

# File 'lib/linguistics/en.rb', line 860

def pluralize_general_verb( word, count )
  count ||= Linguistics::num
  count = normalizeCount( count )
  
  return word if /^(#{PL_count_one})$/i =~ count.to_s

  case word

  # Handle ambiguous present tenses  (simple and compound)
  when /^(#{PL_v_ambiguous_pres})((\s.*)?)$/i
    return PL_v_ambiguous_pres_h[ $1.downcase ] + $2

  # Handle ambiguous preterite and perfect tenses
  when /^(#{PL_v_ambiguous_non_pres})((\s.*)?)$/i
    return word

  # Otherwise, 1st or 2nd person is uninflected
  else
    return word
  end
end

.pluralize_noun(word, count = nil) ⇒ `Object`

Pluralize nouns

# File 'lib/linguistics/en.rb', line 687

def pluralize_noun( word, count=nil )
  value = nil
  count ||= Linguistics::num
  count = normalizeCount( count )

  return word if count == 1

  # Handle user-defined nouns
  #if value = ud_match( word, PL_sb_user_defined )
  #  return value
  #end

  # Handle empty word, singular count and uninflected plurals
  case word
  when ''
    return word
  when /^(#{PL_sb_uninflected})$/i
    return word
  else
    if Linguistics::classical? &&
       /^(#{PL_sb_uninflected_herd})$/i =~ word
      return word
    end
  end

  # Handle compounds ("Governor General", "mother-in-law", "aide-de-camp", etc.)
  case word
  when /^(?:#{PL_sb_postfix_adj})$/i
    value = $2
    return pluralize_noun( $1, 2 ) + value

  when /^(?:#{PL_sb_prep_dual_compound})$/i
    value = [ $2, $3 ] 
    return pluralize_noun( $1, 2 ) + value[0] + pluralize_noun( value[1] )

  when /^(?:#{PL_sb_prep_compound})$/i
    value = $2 
    return pluralize_noun( $1, 2 ) + value

  # Handle pronouns
  when /^((?:#{PL_prep})\s+)(#{PL_pron_acc})$/i
    return $1 + PL_pron_acc_h[ $2.downcase ]

  when /^(#{PL_pron_nom})$/i
    return PL_pron_nom_h[ word.downcase ]

  when /^(#{PL_pron_acc})$/i
    return PL_pron_acc_h[ $1.downcase ]

  # Handle isolated irregular plurals 
  when /(.*)\b(#{PL_sb_irregular})$/i
    return $1 + PL_sb_irregular_h[ $2.downcase ]

  when /(#{PL_sb_U_man_mans})$/i
    return "#{$1}s"

  # Handle families of irregular plurals
  when /(.*)man$/i ;         return "#{$1}men"
  when /(.*[ml])ouse$/i ;        return "#{$1}ice"
  when /(.*)goose$/i ;       return "#{$1}geese"
  when /(.*)tooth$/i ;       return "#{$1}teeth"
  when /(.*)foot$/i ;          return "#{$1}feet"

  # Handle unassimilated imports
  when /(.*)ceps$/i ;          return word
  when /(.*)zoon$/i ;          return "#{$1}zoa"
  when /(.*[csx])is$/i ;       return "#{$1}es"
  when /(#{PL_sb_U_ex_ices})ex$/i; return "#{$1}ices"
  when /(#{PL_sb_U_ix_ices})ix$/i; return "#{$1}ices"
  when /(#{PL_sb_U_um_a})um$/i ;   return "#{$1}a"
  when /(#{PL_sb_U_us_i})us$/i ;   return "#{$1}i"
  when /(#{PL_sb_U_on_a})on$/i ;   return "#{$1}a"
  when /(#{PL_sb_U_a_ae})$/i ;   return "#{$1}e"
  end

  # Handle incompletely assimilated imports
  if Linguistics::classical?
    case word
    when /(.*)trix$/i ;       return "#{$1}trices"
    when /(.*)eau$/i ;        return "#{$1}eaux"
    when /(.*)ieu$/i ;        return "#{$1}ieux"
    when /(.{2,}[yia])nx$/i ;   return "#{$1}nges"
    when /(#{PL_sb_C_en_ina})en$/i; return "#{$1}ina"
    when /(#{PL_sb_C_ex_ices})ex$/i;  return "#{$1}ices"
    when /(#{PL_sb_C_ix_ices})ix$/i;  return "#{$1}ices"
    when /(#{PL_sb_C_um_a})um$/i ;  return "#{$1}a"
    when /(#{PL_sb_C_us_i})us$/i ;  return "#{$1}i"
    when /(#{PL_sb_C_us_us})$/i ; return "#{$1}"
    when /(#{PL_sb_C_a_ae})$/i ;  return "#{$1}e"
    when /(#{PL_sb_C_a_ata})a$/i ;  return "#{$1}ata"
    when /(#{PL_sb_C_o_i})o$/i ;  return "#{$1}i"
    when /(#{PL_sb_C_on_a})on$/i ;  return "#{$1}a"
    when /#{PL_sb_C_im}$/i ;    return "#{word}im"
    when /#{PL_sb_C_i}$/i ;     return "#{word}i"
    end
  end


  # Handle singular nouns ending in ...s or other silibants
  case word
  when /^(#{PL_sb_singular_s})$/i; return "#{$1}es"
  when /^([A-Z].*s)$/;       return "#{$1}es"
  when /(.*)([cs]h|[zx])$/i ;      return "#{$1}#{$2}es"
  # when /(.*)(us)$/i ;        return "#{$1}#{$2}es"

  # Handle ...f -> ...ves
  when /(.*[eao])lf$/i ;       return "#{$1}lves"; 
  when /(.*[^d])eaf$/i ;       return "#{$1}eaves"
  when /(.*[nlw])ife$/i ;        return "#{$1}ives"
  when /(.*)arf$/i ;         return "#{$1}arves"

  # Handle ...y
  when /(.*[aeiou])y$/i ;        return "#{$1}ys"
  when /([A-Z].*y)$/ ;       return "#{$1}s"
  when /(.*)y$/i ;         return "#{$1}ies"

  # Handle ...o
  when /#{PL_sb_U_o_os}$/i ;     return "#{word}s"
  when /[aeiou]o$/i ;          return "#{word}s"
  when /o$/i ;           return "#{word}es"

  # Otherwise just add ...s
  else
    return "#{word}s"
  end
end

.pluralize_special_adjective(word, count) ⇒ `Object`

Handle special adjectives

# File 'lib/linguistics/en.rb', line 884

def pluralize_special_adjective( word, count )
  count ||= Linguistics::num
  count = normalizeCount( count )

  return word if /^(#{PL_count_one})$/i =~ count.to_s

  # Handle user-defined verbs
  #if value = ud_match( word, PL_adj_user_defined )
  #  return value
  #end

  case word

  # Handle known cases
  when /^(#{PL_adj_special})$/i
    return PL_adj_special_h[ $1.downcase ]

  # Handle possessives
  when /^(#{PL_adj_poss})$/i
    return PL_adj_poss_h[ $1.downcase ]

  when /^(.*)'s?$/
    pl = plural_noun( $1 )
    if /s$/ =~ pl
      return "#{pl}'"
    else
      return "#{pl}'s"
    end

  # Otherwise, no idea
  else
    return nil
  end
end

.pluralize_special_verb(word, count) ⇒ `Object`

Pluralize special verbs

# File 'lib/linguistics/en.rb', line 817

def pluralize_special_verb( word, count )
  count ||= Linguistics::num
  count = normalizeCount( count )
  
  return nil if /^(#{PL_count_one})$/i =~ count.to_s

  # Handle user-defined verbs
  #if value = ud_match( word, PL_v_user_defined )
  #  return value
  #end

  case word

  # Handle irregular present tense (simple and compound)
  when /^(#{PL_v_irregular_pres})((\s.*)?)$/i
    return PL_v_irregular_pres_h[ $1.downcase ] + $2

  # Handle irregular future, preterite and perfect tenses 
  when /^(#{PL_v_irregular_non_pres})((\s.*)?)$/i
    return word

  # Handle special cases
  when /^(#{PL_v_special_s})$/, /\s/
    return nil

  # Handle standard 3rd person (chop the ...(e)s off single words)
  when /^(.*)([cs]h|[x]|zz|ss)es$/i
    return $1 + $2
  when /^(..+)ies$/i
    return "#{$1}y"
  when /^(.+)oes$/i
    return "#{$1}o"
  when /^(.*[^s])s$/i
    return $1

  # Otherwise, a regular verb (handle elsewhere)
  else
    return nil
  end
end

.postprocess(original, inflected) ⇒ `Object`

Do normal/classical switching and match capitalization in inflected by examining the original input.

# File 'lib/linguistics/en.rb', line 665

def postprocess( original, inflected )
  inflected.sub!( /([^|]+)\|(.+)/ ) {
    Linguistics::classical? ? $2 : $1
  }

  case original
  when "I"
    return inflected
  when /^[A-Z]+$/
    return inflected.upcase
  when /^[A-Z]/
    # Can't use #capitalize, as it will downcase the rest of the string,
    # too.
    inflected[0,1] = inflected[0,1].upcase
    return inflected
  else
    return inflected
  end
end

.present_participle(word) ⇒ `Object` Also known as: part_pres, PART_PRES

Participles

# File 'lib/linguistics/en.rb', line 1181

def present_participle( word )
       plural = plural_verb( word.to_s, 2 )
  
  plural.sub!( /ie$/, 'y' ) or
    plural.sub!( /ue$/, 'u' ) or
    plural.sub!( /([auy])e$/, '$1' ) or
    plural.sub!( /i$/, '' ) or
    plural.sub!( /([^e])e$/, "\\1" ) or
    /er$/.match( plural ) or
    plural.sub!( /([^aeiou][aeiouy]([bdgmnprst]))$/, "\\1\\2" )

       return "#{plural}ing"
end

.quantify(phrase, number = 0, args = {}) ⇒ `Object`

:joinword: Sets the word (and any surrounding spaces) used as the word separating the quantity from the noun in the resulting string. Defaults to ' of '.

# File 'lib/linguistics/en.rb', line 1356

def quantify( phrase, number=0, args={} )
  num = number.to_i
  config = QuantifyDefaults.dup.update( args )
  
  case num
  when 0
    no( phrase )
  when 1
    a( phrase )
  when SeveralRange
    "several " + plural( phrase, num )
  when NumberRange
    "a number of " + plural( phrase, num )
  when NumerousRange
    "numerous " + plural( phrase, num )
  when ManyRange
    "many " + plural( phrase, num )
  else

    # Anything bigger than the ManyRange gets described like
    # "hundreds of thousands of..." or "millions of..."
    # depending, of course, on how many there are.
    thousands, subthousands = Math::log10( num ).to_i.divmod( 3 )
    stword =
      case subthousands
      when 2
        "hundreds"
      when 1
        "tens"
      else
        nil
      end
    thword = plural( to_thousands(thousands).strip )
    thword = nil if thword.empty?

    [ # Hundreds (of)...
      stword,

      # thousands (of)
      thword,

      # stars.
      plural(phrase, number)
    ].compact.join( config[:joinword] )
  end
end

.synset(word, pos = nil, sense = 1) ⇒ `Object`

Look up the synset associated with the given word or collocation in the WordNet lexicon and return a WordNet::Synset object.

# File 'lib/linguistics/en/wordnet.rb', line 154

def synset( word, pos=nil, sense=1 )
  lex = Linguistics::EN::wnLexicon
  if pos.is_a?( Fixnum)
    sense = pos
    pos = nil
  end
  postries = pos ? [pos] : [:noun, :verb, :adjective, :adverb, :other]
  syn = nil

  postries.each {|pos|
    break if syn = lex.lookupSynsets( word.to_s, pos, sense )
  }

  return syn
end

.synsets(word, pos = nil) ⇒ `Object`

Look up all the synsets associated with the given word or collocation in the WordNet lexicon and return an Array of WordNet::Synset objects. If pos is nil, return synsets for all parts of speech.

# File 'lib/linguistics/en/wordnet.rb', line 174

def synsets( word, pos=nil )
  lex = Linguistics::EN::wnLexicon
  postries = pos ? [pos] : [:noun, :verb, :adjective, :adverb, :other]
  syns = []

  postries.each {|pos|
    syns << lex.lookupSynsets( word.to_s, pos )
  }

  return syns.flatten.compact
end

.to_hundreds(hundreds, tens = 0, units = 0, thousands = 0, joinword = " and ") ⇒ `Object`

Transform the specified number of hundreds-, tens-, and units-place numerals into a word phrase. If the number of thousands (thousands) is greater than 0, it will be used to determine where the decimal point is in relation to the hundreds-place number.

# File 'lib/linguistics/en.rb', line 996

def to_hundreds( hundreds, tens=0, units=0, thousands=0, joinword=" and " )
  joinword = ' ' if joinword.empty?
  if hundreds.nonzero?
    return to_units( hundreds ) + " hundred" +
      (tens.nonzero? || units.nonzero? ? joinword : '') +
      to_tens( tens, units ) +
      to_thousands( thousands )
  elsif tens.nonzero? || units.nonzero?
    return to_tens( tens, units ) + to_thousands( thousands )
  else
    return nil
  end
end

.to_tens(tens, units, thousands = 0) ⇒ `Object`

Transform the specified number of tens- and units-place numerals into a word-phrase at the given number of thousands places.

# File 'lib/linguistics/en.rb', line 982

def to_tens( tens, units, thousands=0 )
  unless tens == 1
    return Tens[ tens ] + ( tens.nonzero? && units.nonzero? ? '-' : '' ) +
      to_units( units, thousands )
  else
    return Teens[ units ] + to_thousands( thousands )
  end
end

.to_thousands(thousands = 0) ⇒ `Object`

Transform the specified number into one or more words like ‘thousand’, ‘million’, etc. Uses the thousands (American) system.

# File 'lib/linguistics/en.rb', line 1012

def to_thousands( thousands=0 )
  parts = []
  (0..thousands).step( Thousands.length - 1 ) {|i|
    if i.zero?
      parts.push Thousands[ thousands % (Thousands.length - 1) ]
    else
      parts.push Thousands.last
    end
  }

  return parts.join(" ")
end

.to_units(units, thousands = 0) ⇒ `Object`

Transform the specified number of units-place numerals into a word-phrase at the given number of thousands places.



975
976
977

# File 'lib/linguistics/en.rb', line 975

def to_units( units, thousands=0 )
  return Units[ units ] + to_thousands( thousands )
end

.wnError ⇒ `Object`

If #haveWordnet? returns false, this can be called to fetch the exception which was raised when WordNet was loaded.

111	# File 'lib/linguistics/en/wordnet.rb', line 111 def wnError ; @wnError; end

.wnLexicon ⇒ `Object`

The instance of the WordNet::Lexicon used for all Linguistics WordNet functions.

# File 'lib/linguistics/en/wordnet.rb', line 115

def wnLexicon
  if @wnError
    raise NotImplementedError,
      "WordNet functions are not loaded: %s" %
      @wnError.message
  end

  @wnLexicon ||= WordNet::Lexicon::new
end

Module: Linguistics::EN

Overview

Defined Under Namespace

Constant Summary collapse

Class Method Summary collapse

Class Method Details

.a(phrase, count = nil) ⇒ Object Also known as: an, A, AN

.conjunction(obj, args = {}) ⇒ Object

.debugMsg(*msgs) ⇒ Object

.def_synset_function(meth) ⇒ Object

.hasLinkParser? ⇒ Boolean

.hasWordnet? ⇒ Boolean

.indef_article(word, count) ⇒ Object

.infinitive(word) ⇒ Object

.language ⇒ Object

.linkParse(sent) ⇒ Object Also known as: sentence

.linkParser ⇒ Object

.lpError ⇒ Object

.matchgroup(*parts) ⇒ Object

.no(phrase, count = nil) ⇒ Object Also known as: NO

.normalizeCount(count, default = 2) ⇒ Object

.number_to_words(num, config) ⇒ Object

.numwords(number, hashargs = {}) ⇒ Object

.ordinal(number) ⇒ Object Also known as: ORD

.plural(phrase, count = nil) ⇒ Object Also known as: PL

.plural_adjective(phrase, count = nil) ⇒ Object Also known as: plural_adj, PL_ADJ

.plural_noun(phrase, count = nil) ⇒ Object Also known as: PL_N

.plural_verb(phrase, count = nil) ⇒ Object Also known as: PL_V

.pluralize_general_verb(word, count) ⇒ Object

.pluralize_noun(word, count = nil) ⇒ Object

.pluralize_special_adjective(word, count) ⇒ Object

.pluralize_special_verb(word, count) ⇒ Object

.postprocess(original, inflected) ⇒ Object

.present_participle(word) ⇒ Object Also known as: part_pres, PART_PRES

.quantify(phrase, number = 0, args = {}) ⇒ Object

.synset(word, pos = nil, sense = 1) ⇒ Object

.synsets(word, pos = nil) ⇒ Object

.to_hundreds(hundreds, tens = 0, units = 0, thousands = 0, joinword = " and ") ⇒ Object

.to_tens(tens, units, thousands = 0) ⇒ Object

.to_thousands(thousands = 0) ⇒ Object

.to_units(units, thousands = 0) ⇒ Object

.wnError ⇒ Object

.wnLexicon ⇒ Object