Module: Linguistics::EN

Defined in:
lib/linguistics/en.rb,
lib/linguistics/en/wordnet.rb,
lib/linguistics/en/infinitive.rb,
lib/linguistics/en/linkparser.rb

Overview

This module contains English-language linguistics functions accessible from the Linguistics module, or as a standalone function library.

Defined Under Namespace

Classes: Infinitive

Constant Summary collapse

Version =

CVS version tag

/([\d\.]+)/.match( %q{$Revision: 1.8 $} )[1]
Rcsid =

CVS revision tag

%q$Id: en.rb,v 1.8 2003/09/14 10:47:12 deveiant Exp $
PL_sb_irregular_s =

Plurals

{
  "ephemeris"  => "ephemerides",
  "iris"   => "irises|irides",
  "clitoris" => "clitorises|clitorides",
  "corpus" => "corpuses|corpora",
  "opus"   => "opuses|opera",
  "genus"    => "genera",
  "mythos" => "mythoi",
  "penis"    => "penises|penes",
  "testis" => "testes",
}
PL_sb_irregular_h =
{
  "child"    => "children",
  "brother"  => "brothers|brethren",
  "loaf"   => "loaves",
  "hoof"   => "hoofs|hooves",
  "beef"   => "beefs|beeves",
  "money"    => "monies",
  "mongoose" => "mongooses",
  "ox"   => "oxen",
  "cow"    => "cows|kine",
  "soliloquy"  => "soliloquies",
  "graffito" => "graffiti",
  "prima donna"  => "prima donnas|prime donne",
  "octopus"  => "octopuses|octopodes",
  "genie"    => "genies|genii",
  "ganglion" => "ganglions|ganglia",
  "trilby" => "trilbys",
  "turf"   => "turfs|turves",
}.update( PL_sb_irregular_s )
PL_sb_irregular =
matchgroup PL_sb_irregular_h.keys
PL_sb_C_a_ata =

Classical “..a” -> “..ata”

matchgroup %w[
  anathema bema carcinoma charisma diploma
  dogma drama edema enema enigma lemma
  lymphoma magma melisma miasma oedema
  sarcoma schema soma stigma stoma trauma
  gumma pragma
].collect {|word| word[0...-1]}
PL_sb_U_a_ae =

Unconditional “..a” -> “..ae”

matchgroup %w[
  alumna alga vertebra persona
]
PL_sb_C_a_ae =

Classical “..a” -> “..ae”

matchgroup %w[
  amoeba antenna formula hyperbola
  medusa nebula parabola abscissa
  hydra nova lacuna aurora .*umbra
  flora fauna
]
PL_sb_C_en_ina =

Classical “..en” -> “..ina”

matchgroup %w[
  stamen foramen lumen
].collect {|word| word[0...-2] }
PL_sb_U_um_a =

Unconditional “..um” -> “..a”

matchgroup %w[
  bacterium  agendum  desideratum  erratum
  stratum  datum  ovum   extremum
  candelabrum
].collect {|word| word[0...-2] }
PL_sb_C_um_a =

Classical “..um” -> “..a”

matchgroup %w[
  maximum  minimum  momentum optimum
  quantum  cranium  curriculum dictum
  phylum aquarium  compendium emporium
  enconium gymnasium honorarium  interregnum
  lustrum  memorandum  millenium  rostrum 
  spectrum speculum  stadium  trapezium
  ultimatum  medium vacuum  velum 
  consortium
].collect {|word| word[0...-2]}
PL_sb_U_us_i =

Unconditional “..us” -> “i”

matchgroup %w[
  alumnus  alveolus bacillus  bronchus
  locus  nucleus  stimulus meniscus
].collect {|word| word[0...-2]}
PL_sb_C_us_i =

Classical “..us” -> “..i”

matchgroup %w[
  focus  radius genius
  incubus  succubus nimbus
  fungus nucleolus stylus
  torus  umbilicus  uterus
  hippopotamus
].collect {|word| word[0...-2]}
PL_sb_C_us_us =

Classical “..us” -> “..us” (assimilated 4th declension latin nouns)

matchgroup %w[
  status apparatus prospectus sinus
  hiatus impetus plexus
]
PL_sb_U_on_a =

Unconditional “..on” -> “a”

matchgroup %w[
  criterion  perihelion aphelion
  phenomenon prolegomenon  noumenon
  organon  asyndeton  hyperbaton
].collect {|word| word[0...-2]}
PL_sb_C_on_a =

Classical “..on” -> “..a”

matchgroup %w[
  oxymoron
].collect {|word| word[0...-2]}
PL_sb_C_o_i_a =

Classical “..o” -> “..i” (but normally -> “..os”)

%w[
  solo   soprano  basso  alto
  contralto  tempo  piano
]
PL_sb_C_o_i =
matchgroup PL_sb_C_o_i_a.collect{|word| word[0...-1]}
PL_sb_U_o_os =

Always “..o” -> “..os”

matchgroup( %w[
  albino archipelago armadillo
  commando crescendo fiasco
  ditto  dynamo embryo
  ghetto guano inferno
  jumbo  lumbago  magneto
  manifesto  medico octavo
  photo  pro    quarto  
  canto  lingo  generalissimo
  stylo  rhino
] | PL_sb_C_o_i_a )
PL_sb_U_ex_ices =

Unconditional “..[ei]x” -> “..ices”

matchgroup %w[
  codex  murex  silex
].collect {|word| word[0...-2]}
PL_sb_U_ix_ices =
matchgroup %w[
  radix  helix
].collect {|word| word[0...-2]}
PL_sb_C_ex_ices =

Classical “..[ei]x” -> “..ices”

matchgroup %w[
  vortex vertex  cortex latex
  pontifex apex    index simplex
].collect {|word| word[0...-2]}
PL_sb_C_ix_ices =
matchgroup %w[
  appendix
].collect {|word| word[0...-2]}
PL_sb_C_i =

Arabic: “..” -> “..i”

matchgroup %w[
  afrit  afreet efreet
]
PL_sb_C_im =

Hebrew: “..” -> “..im”

matchgroup %w[
  goy    seraph  cherub
]
PL_sb_U_man_mans =

Unconditional “..man” -> “..mans”

matchgroup %w[
  human
  Alabaman Bahaman Burman German
  Hiroshiman Liman Nakayaman Oklahoman
  Panaman Selman Sonaman Tacoman Yakiman
  Yokohaman Yuman
]
PL_sb_uninflected_s =
[
  # Pairs or groups subsumed to a singular...
  "breeches", "britches", "clippers", "gallows", "hijinks",
  "headquarters", "pliers", "scissors", "testes", "herpes",
  "pincers", "shears", "proceedings", "trousers",

  # Unassimilated Latin 4th declension
  "cantus", "coitus", "nexus",

  # Recent imports...
  "contretemps", "corps", "debris",
  ".*ois",

  # Diseases
  ".*measles", "mumps",

  # Miscellaneous others...
  "diabetes", "jackanapes", "series", "species", "rabies",
  "chassis", "innings", "news", "mews",
]
PL_sb_uninflected_herd =

Don’t inflect in classical mode, otherwise normal inflection

matchgroup %w[
  wildebeest swine eland bison buffalo
  elk moose rhinoceros
]
PL_sb_uninflected =
matchgroup [

  # Some fish and herd animals
  ".*fish", "tuna", "salmon", "mackerel", "trout",
  "bream", "sea[- ]bass", "carp", "cod", "flounder", "whiting", 

  ".*deer", ".*sheep", 

  # All nationals ending in -ese
  "Portuguese", "Amoyese", "Borghese", "Congoese", "Faroese",
  "Foochowese", "Genevese", "Genoese", "Gilbertese", "Hottentotese",
  "Kiplingese", "Kongoese", "Lucchese", "Maltese", "Nankingese",
  "Niasese", "Pekingese", "Piedmontese", "Pistoiese", "Sarawakese",
  "Shavese", "Vermontese", "Wenchowese", "Yengeese",
  ".*[nrlm]ese",

  # Some words ending in ...s (often pairs taken as a whole)
  PL_sb_uninflected_s,

  # Diseases
  ".*pox",

  # Other oddities
  "graffiti", "djinn"
]
PL_sb_singular_s =

Singular words ending in …s (all inflect with …es)

matchgroup %w[
  .*ss
  acropolis aegis alias arthritis asbestos atlas
  bathos bias bronchitis bursitis caddis cannabis
  canvas chaos cosmos dais digitalis encephalitis
  epidermis ethos eyas gas glottis hepatitis
  hubris ibis lens mantis marquis metropolis
  neuritis pathos pelvis polis rhinoceros
  sassafras tonsillitis trellis .*us
]
PL_v_special_s =
matchgroup [
  PL_sb_singular_s,
  PL_sb_uninflected_s,
  PL_sb_irregular_s.keys,
  '(.*[csx])is',
  '(.*)ceps',
  '[A-Z].*s',
]
PL_sb_postfix_adj =
'(' + {

  'general' => ['(?!major|lieutenant|brigadier|adjutant)\S+'],
  'martial' => ["court"],

}.collect {|key,val|
  matchgroup( matchgroup(val) + "(?=(?:-|\\s+)#{key})" )
}.join("|") + ")(.*)"
PL_sb_military =
%r'major|lieutenant|brigadier|adjutant|quartermaster'
PL_sb_general =
%r'((?!#{PL_sb_military.source}).*?)((-|\s+)general)'
PL_prep =
matchgroup %w[
  about above across after among around at athwart before behind
  below beneath beside besides between betwixt beyond but by
  during except for from in into near of off on onto out over
  since till to under until unto upon with
]
PL_sb_prep_dual_compound =
%r'(.*?)((?:-|\s+)(?:#{PL_prep}|d[eu])(?:-|\s+))a(?:-|\s+)(.*)'
PL_sb_prep_compound =
%r'(.*?)((-|\s+)(#{PL_prep}|d[eu])((-|\s+)(.*))?)'
PL_pron_nom_h =
{
  #  Nominative   Reflexive
  "i"    => "we",  "myself"   =>  "ourselves",
  "you"  => "you",  "yourself" =>  "yourselves",
  "she"  => "they", "herself"  => "themselves",
  "he" => "they",  "himself"  =>  "themselves",
  "it" => "they",  "itself"   =>  "themselves",
  "they" => "they",  "themself" =>  "themselves",

  #  Possessive
  "mine"  => "ours",
  "yours"   => "yours",
  "hers"  => "theirs",
  "his"   => "theirs",
  "its"   => "theirs",
  "theirs" => "theirs",
}
PL_pron_nom =
matchgroup PL_pron_nom_h.keys
PL_pron_acc_h =
{
  #  Accusative   Reflexive
  "me" => "us",  "myself"   =>  "ourselves",
  "you"  => "you",  "yourself" =>  "yourselves",
  "her"  => "them", "herself"  => "themselves",
  "him"  => "them", "himself"  => "themselves",
  "it" => "them",  "itself"   =>  "themselves",
  "them" => "them",  "themself" =>  "themselves",
}
PL_pron_acc =
matchgroup PL_pron_acc_h.keys
PL_v_irregular_pres_h =
{
  #  1St pers. sing.    2nd pers. sing.   3rd pers. singular
  #        3rd pers. (indet.)  
  "am" => "are", "are" => "are", "is"   => "are",
  "was"  => "were", "were"  => "were", "was"  => "were",
  "have"  => "have", "have"  => "have",  "has"   => "have",
}
PL_v_irregular_pres =
matchgroup PL_v_irregular_pres_h.keys
PL_v_ambiguous_pres_h =
{
  #  1st pers. sing.    2nd pers. sing.   3rd pers. singular
  #        3rd pers. (indet.)  
  "act"  => "act",  "act"  => "act",  "acts"   => "act",
  "blame"  => "blame",  "blame"  => "blame",  "blames"  => "blame",
  "can"  => "can",  "can"  => "can",  "can"    => "can",
  "must" => "must",  "must" => "must",  "must"   => "must",
  "fly"  => "fly",  "fly"  => "fly",  "flies"    => "fly",
  "copy" => "copy",  "copy" => "copy",  "copies"  => "copy",
  "drink"  => "drink",  "drink"  => "drink",  "drinks"  => "drink",
  "fight"  => "fight",  "fight"  => "fight",  "fights"  => "fight",
  "fire" => "fire",  "fire" => "fire",  "fires"   => "fire",
  "like" => "like",  "like" => "like",  "likes"   => "like",
  "look" => "look",  "look" => "look",  "looks"   => "look",
  "make" => "make",  "make" => "make",  "makes"   => "make",
  "reach"  => "reach",  "reach"  => "reach",  "reaches" => "reach",
  "run"  => "run",  "run"  => "run",  "runs"    => "run",
  "sink" => "sink",  "sink" => "sink",  "sinks"   => "sink",
  "sleep"  => "sleep",  "sleep"  => "sleep",  "sleeps"  => "sleep",
  "view" => "view",  "view" => "view",  "views"   => "view",
}
PL_v_ambiguous_pres =
matchgroup PL_v_ambiguous_pres_h.keys
PL_v_irregular_non_pres =
matchgroup %w[
  did had ate made put 
  spent fought sank gave sought
  shall could ought should
]
PL_v_ambiguous_non_pres =
matchgroup %w[
  thought saw bent will might cut
]
PL_count_zero =
matchgroup %w[
  0 no zero nil
]
PL_count_one =
matchgroup %w[
  1 a an one each every this that
]
PL_adj_special_h =
{
  "a"    => "some",  "an"   =>  "some",
  "this" => "these", "that" => "those",
}
PL_adj_special =
matchgroup PL_adj_special_h.keys
PL_adj_poss_h =
{
  "my"    => "our",
  "your" => "your",
  "its"  => "their",
  "her"  => "their",
  "his"  => "their",
  "their"  => "their",
}
PL_adj_poss =
matchgroup PL_adj_poss_h.keys
Nth =

Numerical inflections

{
  0 => 'th',
  1 => 'st',
  2 => 'nd',
  3 => 'rd',
  4 => 'th',
  5 => 'th',
  6 => 'th',
  7 => 'th',
  8 => 'th',
  9 => 'th',
  11 => 'th',
  12 => 'th',
  13 => 'th',
}
Ordinals =

Ordinal word parts

{}
OrdinalSuffixes =
Ordinals.keys.join("|") + "|"
Units =

Numeral names

[''] + %w[one two three four five six seven eight nine]
Teens =
%w[ten eleven twelve thirteen fourteen
fifteen sixteen seventeen eighteen nineteen]
Tens =
['',''] + %w[twenty thirty forty fifty sixty seventy eighty ninety]
Thousands =
[' ', ' thousand'] + %w[
  m b tr quadr quint sext sept oct non dec undec duodec tredec
  quattuordec quindec sexdec septemdec octodec novemdec vigint
].collect {|prefix| ' ' + prefix + 'illion'}
NumberToWordsFunctions =

A collection of functions for transforming digits into word phrases. Indexed by the number of digits being transformed; e.g., NumberToWordsFunctions[2] is the function for transforming double-digit numbers.

[
  proc {|*args| raise "No digits (#{args.inspect})"},

  # Single-digits
  proc {|zero,x|
    (x.nonzero? ? to_units(x) : "#{zero} ")
  },

  # Double-digits
  proc {|zero,x,y|
    if x.nonzero?
      to_tens( x, y )
    elsif y.nonzero?
      "#{zero} " + NumberToWordsFunctions[1].call( zero, y )
    else
      ([zero] * 2).join(" ")
    end
  },

  # Triple-digits
  proc {|zero,x,y,z|
    NumberToWordsFunctions[1].call(zero,x) + 
    NumberToWordsFunctions[2].call(zero,y,z)
  }
]
A_abbrev =

This pattern matches strings of capitals starting with a “vowel-sound” consonant followed by another consonant, and which are not likely to be real words (oh, all right then, it’s just magic!)

%{
  (?! FJO | [HLMNS]Y.  | RY[EO] | SQU
    | ( F[LR]? | [HL] | MN? | N | RH? | S[CHKLMNPTVW]? | X(YL)?) [AEIOU])
  [FHLMNRSX][A-Z]
}
A_y_cons =

This pattern codes the beginnings of all english words begining with a ‘y’ followed by a consonant. Any other y-consonant prefix therefore implies an abbreviation.

'y(b[lor]|cl[ea]|fere|gg|p[ios]|rou|tt)'
A_explicit_an =

Exceptions to exceptions

matchgroup( "euler", "hour(?!i)", "heir", "honest", "hono" )
NumwordDefaults =

Default configuration arguments for the #numwords function

{
  :group   => 0,
  :comma   => ', ',
  :and   => ' and ',
  :zero    => 'zero',
  :decimal => 'point',
  :asArray => false,
}
SeveralRange =

Default ranges for #quantify

2..5
NumberRange =
6..19
NumerousRange =
20..45
ManyRange =
46..99
QuantifyDefaults =

Default configuration arguments for the #quantify function

{
  :joinword  => " of ",
}
ConjunctionDefaults =

Default configuration arguments for the #conjunction (junction, what’s your) function.

{
  :separator   => ', ',
  :altsep      => '; ',
  :penultimate => true,
  :conjunctive => 'and',
  :combine   => true,
  :casefold    => true,
  :generalize    => false,
  :quantsort   => true,
}
IrregularInfinitives =

Irregular words => infinitive forms

{
  'abided'     => 'abide',
  'abode'        => 'abide',
  'am'       => 'be',
  'are'        => 'be',
  'arisen'     => 'arise',
  'arose'        => 'arise',
  'ate'        => 'eat',
  'awaked'     => 'awake',
  'awoke'        => 'awake',
  'bade'       => 'bid',
  'beaten'     => 'beat',
  'became'     => 'become',
  'been'       => 'be',
  'befallen'     => 'befall',
  'befell'     => 'befall',
  'began'        => 'begin',
  'begat'        => 'beget',
  'begot'        => 'beget',
  'begotten'     => 'beget',
  'begun'        => 'begin',
  'beheld'     => 'behold',
  'bent'       => 'bend',
  'bereaved'     => 'bereave',
  'bereft'     => 'bereave',
  'beseeched'      => 'beseech',
  'besought'     => 'beseech',
  'bespoke'      => 'bespeak',
  'bespoken'     => 'bespeak',
  'bestrewed'      => 'bestrew',
  'bestrewn'     => 'bestrew',
  'bestrid'      => 'bestride',
  'bestridden'   => 'bestride',
  'bestrode'     => 'bestride',
  'betaken'      => 'betake',
  'bethought'      => 'bethink',
  'betook'     => 'betake',
  'betted'     => 'bet',
  'bidden'     => 'bid',
  'bided'        => 'bide',
  'bit'        => 'bite',
  'bitten'     => 'bite',
  'bled'       => 'bleed',
  'blended'      => 'blend',
  'blent'        => 'blend',
  'blessed'      => 'bless',
  'blest'        => 'bless',
  'blew'       => 'blow',
  'blown'        => 'blow',
  'bode'       => 'bide',
  'bore'       => 'bear',
  'born'       => 'bear',
  'borne'        => 'bear',
  'bought'     => 'buy',
  'bound'        => 'bind',
  'bred'       => 'breed',
  'broadcasted'    => 'broadcast',
  'broke'        => 'break',
  'broken'     => 'break',
  'brought'      => 'bring',
  'browbeaten'   => 'browbeat',
  'built'        => 'build',
  'burned'     => 'burn',
  'burnt'        => 'burn',
  'came'       => 'come',
  'caught'     => 'catch',
  'chid'       => 'chide',
  'chidden'      => 'chide',
  'chided'     => 'chide',
  'chose'        => 'choose',
  'chosen'     => 'choose',
  'clad'       => 'clothe',
  'clave'        => 'cleave',
  'cleaved'      => 'cleave',
  'cleft'        => 'cleave',
  'clothed'      => 'clothe',
  'clove'        => 'cleave',
  'cloven'     => 'cleave',
  'clung'        => 'cling',
  'costed'     => 'cost',
  'could'        => 'can',
  'crept'        => 'creep',
  'crew'       => 'crow',
  'crowed'     => 'crow',
  'dealt'        => 'deal',
  'did'        => 'do',
  'done'       => 'do',
  'dove'       => 'dive',
  'drank'        => 'drink',
  'drawn'        => 'draw',
  'dreamed'      => 'dream',
  'dreamt'     => 'dream',
  'drew'       => 'draw',
  'driven'     => 'drive',
  'drove'        => 'drive',
  'drunk'        => 'drink',
  'dug'        => 'dig',
  'dwelled'      => 'dwell',
  'dwelt'        => 'dwell',
  'eaten'        => 'eat',
  'fallen'     => 'fall',
  'fed'        => 'feed',
  'fell'       => 'fall',
  'felt'       => 'feel',
  'fled'       => 'flee',
  'flew'       => 'fly',
  'flown'        => 'fly',
  'flung'        => 'fling',
  'forbad'     => 'forbid',
  'forbade'      => 'forbid',
  'forbidden'      => 'forbid',
  'forbore'      => 'forbear',
  'forborne'     => 'forbear',
  'fordid'     => 'fordo',
  'fordone'      => 'fordo',
  'forecasted'   => 'forecast',
  'foregone'     => 'forego',
  'foreknew'     => 'foreknow',
  'foreknown'      => 'foreknow',
  'foreran'      => 'forerun',
  'foresaw'      => 'foresee',
  'foreshowed'   => 'foreshow',
  'foreshown'      => 'foreshow',
  'foretold'     => 'foretell',
  'forewent'     => 'forego',
  'forgave'      => 'forgive',
  'forgiven'     => 'forgive',
  'forgot'     => 'forget',
  'forgotten'      => 'forget',
  'forsaken'     => 'forsake',
  'forseen'      => 'foresee',
  'forsook'      => 'forsake',
  'forswore'     => 'forswear',
  'forsworn'     => 'forswear',
  'fought'     => 'fight',
  'found'        => 'find',
  'froze'        => 'freeze',
  'frozen'     => 'freeze',
  'gainsaid'     => 'gainsay',
  'gave'       => 'give',
  'gilded'     => 'gild',
  'gilt'       => 'gild',
  'girded'     => 'gird',
  'girt'       => 'gird',
  'given'        => 'give',
  'gone'       => 'go',
  'got'        => 'get',
  'gotten'     => 'get',
  'graved'     => 'grave',
  'graven'     => 'grave',
  'grew'       => 'grow',
  'ground'     => 'grind',
  'grown'        => 'grow',
  'had'        => 'have',
  'hamstringed'    => 'hamstring',
  'hamstrung'      => 'hamstring',
  'hanged'     => 'hang',
  'heard'        => 'hear',
  'heaved'     => 'heave',
  'held'       => 'hold',
  'hewed'        => 'hew',
  'hewn'       => 'hew',
  'hid'        => 'hide',
  'hidden'     => 'hide',
  'hove'       => 'heave',
  'hung'       => 'hang',
  'inlaid'     => 'inlay',
  'is'       => 'be',
  'kept'       => 'keep',
  'kneeled'      => 'kneel',
  'knelt'        => 'kneel',
  'knew'       => 'know',
  'knitted'      => 'knit',
  'known'        => 'know',
  'laded'        => 'lade',
  'laden'        => 'lade',
  'laid'       => 'lay',
  'lain'       => 'lie',
  'lay'        => 'lie',
  'leaned'     => 'lean',
  'leant'        => 'lean',
  'leaped'     => 'leap',
  'leapt'        => 'leap',
  'learned'      => 'learn',
  'learnt'     => 'learn',
  'led'        => 'lead',
  'left'       => 'leave',
  'lent'       => 'lend',
  'lighted'      => 'light',
  'lit'        => 'light',
  'lost'       => 'lose',
  'made'       => 'make',
  'meant'        => 'mean',
  'melted'     => 'melt',
  'met'        => 'meet',
  'might'        => 'may',
  'misdealt'     => 'misdeal',
  'misgave'      => 'misgive',
  'misgiven'     => 'misgive',
  'mislaid'      => 'mislay',
  'misled'     => 'mislead',
  'mistaken'     => 'mistake',
  'mistook'      => 'mistake',
  'misunderstood'    => 'misunderstand',
  'molten'     => 'melt',
  'mowed'        => 'mow',
  'mown'       => 'mow',
  'outate'     => 'outeat',
  'outbade'      => 'outbid',
  'outbidden'      => 'outbid',
  'outbred'      => 'outbreed',
  'outdid'     => 'outdo',
  'outdone'      => 'outdo',
  'outeaten'     => 'outeat',
  'outfought'      => 'outfight',
  'outgone'      => 'outgo',
  'outgrew'      => 'outgrow',
  'outgrown'     => 'outgrow',
  'outlaid'      => 'outlay',
  'outran'     => 'outrun',
  'outridden'      => 'outride',
  'outrode'      => 'outride',
  'outsat'     => 'outsit',
  'outshone'     => 'outshine',
  'outshot'      => 'outshoot',
  'outsold'      => 'outsell',
  'outspent'     => 'outspend',
  'outthrew'     => 'outthrow',
  'outthrown'      => 'outthrow',
  'outwent'      => 'outgo',
  'outwore'      => 'outwear',
  'outworn'      => 'outwear',
  'overate'      => 'overeat',
  'overbade'     => 'overbid',
  'overbidden'   => 'overbid',
  'overblew'     => 'overblow',
  'overblown'      => 'overblow',
  'overbore'     => 'overbear',
  'overborn'     => 'overbear',
  'overborne'      => 'overbear',
  'overbought'   => 'overbuy',
  'overbuilt'      => 'overbuild',
  'overcame'     => 'overcome',
  'overdid'      => 'overdo',
  'overdone'     => 'overdo',
  'overdrawn'      => 'overdraw',
  'overdrew'     => 'overdraw',
  'overdriven'   => 'overdrive',
  'overdrove'      => 'overdrive',
  'overeaten'      => 'overeat',
  'overfed'      => 'overfeed',
  'overflew'     => 'overfly',
  'overflown'      => 'overfly',
  'overgrew'     => 'overgrow',
  'overgrown'      => 'overgrow',
  'overhanged'   => 'overhang',
  'overheard'      => 'overhear',
  'overhung'     => 'overhang',
  'overladed'      => 'overlade',
  'overladen'      => 'overlade',
  'overlaid'     => 'overlay',
  'overlain'     => 'overlie',
  'overlay'      => 'overlie',
  'overleaped'   => 'overleap',
  'overleapt'      => 'overleap',
  'overpaid'     => 'overpay',
  'overran'      => 'overrun',
  'overridden'   => 'override',
  'overrode'     => 'override',
  'oversaw'      => 'oversee',
  'overseen'     => 'oversee',
  'oversewed'      => 'oversew',
  'oversewn'     => 'oversew',
  'overshot'     => 'overshoot',
  'overslept'      => 'oversleep',
  'overspent'      => 'overspend',
  'overtaken'      => 'overtake',
  'overthrew'      => 'overthrow',
  'overthrown'   => 'overthrow',
  'overtook'     => 'overtake',
  'overwinded'   => 'overwind',
  'overwound'      => 'overwind',
  'overwritten'    => 'overwrite',
  'overwrote'      => 'overwrite',
  'paid'       => 'pay',
  'partaken'     => 'partake',
  'partook'      => 'partake',
  'prechose'     => 'prechoose',
  'prechosen'      => 'prechoose',
  'proved'     => 'prove',
  'proven'     => 'prove',
  'quitted'      => 'quit',
  'ran'        => 'run',
  'rang'       => 'ring',
  'reaved'     => 'reave',
  'rebuilt'      => 'rebuild',
  'reeved'     => 'reeve',
  'reft'       => 'reave',
  'relaid'     => 'relay',
  'rent'       => 'rend',
  'repaid'     => 'repay',
  'retold'     => 'retell',
  'ridded'     => 'rid',
  'ridden'     => 'ride',
  'risen'        => 'rise',
  'rived'        => 'rive',
  'riven'        => 'rive',
  'rode'       => 'ride',
  'rose'       => 'rise',
  'rove'       => 'reeve',
  'rung'       => 'ring',
  'said'       => 'say',
  'sang'       => 'sing',
  'sank'       => 'sink',
  'sat'        => 'sit',
  'saw'        => 'see',
  'sawed'        => 'saw',
  'sawn'       => 'saw',
  'seen'       => 'see',
  'sent'       => 'send',
  'sewed'        => 'sew',
  'sewn'       => 'sew',
  'shaken'     => 'shake',
  'shaved'     => 'shave',
  'shaven'     => 'shave',
  'sheared'      => 'shear',
  'shined'     => 'shine',
  'shod'       => 'shoe',
  'shoed'        => 'shoe',
  'shone'        => 'shine',
  'shook'        => 'shake',
  'shorn'        => 'shear',
  'shot'       => 'shoot',
  'showed'     => 'show',
  'shown'        => 'show',
  'shrank'     => 'shrink',
  'shredded'     => 'shred',
  'shrived'      => 'shrive',
  'shriven'      => 'shrive',
  'shrove'     => 'shrive',
  'shrunk'     => 'shrink',
  'shrunken'     => 'shrink',
  'slain'        => 'slay',
  'slept'        => 'sleep',
  'slew'       => 'slay',
  'slid'       => 'slide',
  'slidden'      => 'slide',
  'slung'        => 'sling',
  'slunk'        => 'slink',
  'smelled'      => 'smell',
  'smelt'        => 'smell',
  'smitten'      => 'smite',
  'smote'        => 'smite',
  'snuck'        => 'sneak',
  'sold'       => 'sell',
  'sought'     => 'seek',
  'sowed'        => 'sow',
  'sown'       => 'sow',
  'span'       => 'spin',
  'spat'       => 'spit',
  'sped'       => 'speed',
  'speeded'      => 'speed',
  'spelled'      => 'spell',
  'spelt'        => 'spell',
  'spent'        => 'spend',
  'spilled'      => 'spill',
  'spilt'        => 'spill',
  'spoiled'      => 'spoil',
  'spoilt'     => 'spoil',
  'spoke'        => 'speak',
  'spoken'     => 'speak',
  'sprang'     => 'spring',
  'sprung'     => 'spring',
  'spun'       => 'spin',
  'stank'        => 'stink',
  'staved'     => 'stave',
  'stole'        => 'steal',
  'stolen'     => 'steal',
  'stood'        => 'stand',
  'stove'        => 'stave',
  'strewed'      => 'strew',
  'strewn'     => 'strew',
  'stricken'     => 'strike',
  'strid'        => 'stride',
  'stridden'     => 'stride',
  'strived'      => 'strive',
  'striven'      => 'strive',
  'strode'     => 'stride',
  'strove'     => 'strive',
  'struck'     => 'strike',
  'strung'     => 'string',
  'stuck'        => 'stick',
  'stung'        => 'sting',
  'stunk'        => 'stink',
  'sung'       => 'sing',
  'sunk'       => 'sink',
  'sunken'     => 'sink',
  'swam'       => 'swim',
  'sweated'      => 'sweat',
  'swelled'      => 'swell',
  'swept'        => 'sweep',
  'swollen'      => 'swell',
  'swore'        => 'swear',
  'sworn'        => 'swear',
  'swum'       => 'swim',
  'swung'        => 'swing',
  'taken'        => 'take',
  'taught'     => 'teach',
  'thought'      => 'think',
  'threw'        => 'throw',
  'thrived'      => 'thrive',
  'thriven'      => 'thrive',
  'throve'     => 'thrive',
  'thrown'     => 'throw',
  'told'       => 'tell',
  'took'       => 'take',
  'tore'       => 'tear',
  'torn'       => 'tear',
  'trod'       => 'tread',
  'trodden'      => 'tread',
  'unbent'     => 'unbend',
  'unbound'      => 'unbind',
  'unbuilt'      => 'unbuild',
  'underbought'    => 'underbuy',
  'underfed'     => 'underfeed',
  'undergone'      => 'undergo',
  'underlaid'      => 'underlay',
  'underlain'      => 'underlie',
  'underlay'     => 'underlie',
  'underpaid'      => 'underpay',
  'underran'     => 'underrun',
  'undershot'      => 'undershoot',
  'undersold'      => 'undersell',
  'understood'   => 'understand',
  'undertaken'   => 'undertake',
  'undertook'      => 'undertake',
  'underwent'      => 'undergo',
  'underwritten'   => 'underwrite',
  'underwrote'   => 'underwrite',
  'undid'        => 'undo',
  'undone'     => 'undo',
  'undrawn'      => 'undraw',
  'undrew'     => 'undraw',
  'unfroze'      => 'unfreeze',
  'unfrozen'     => 'unfreeze',
  'ungirded'     => 'ungird',
  'ungirt'     => 'ungird',
  'unhanged'     => 'unhang',
  'unhung'     => 'unhang',
  'unknitted'      => 'unknit',
  'unladed'      => 'unlade',
  'unladen'      => 'unlade',
  'unlaid'     => 'unlay',
  'unlearned'      => 'unlearn',
  'unlearnt'     => 'unlearn',
  'unmade'     => 'unmake',
  'unreeved'     => 'unreeve',
  'unrove'     => 'unreeve',
  'unsaid'     => 'unsay',
  'unslung'      => 'unsling',
  'unspoke'      => 'unspeak',
  'unspoken'     => 'unspeak',
  'unstrung'     => 'unstring',
  'unstuck'      => 'unstick',
  'unswore'      => 'unswear',
  'unsworn'      => 'unswear',
  'untaught'     => 'unteach',
  'unthought'      => 'unthink',
  'untrod'     => 'untread',
  'untrodden'      => 'untread',
  'unwinded'     => 'unwind',
  'unwound'      => 'unwind',
  'unwove'     => 'unweave',
  'unwoven'      => 'unweave',
  'upbuilt'      => 'upbuild',
  'upheld'     => 'uphold',
  'uprisen'      => 'uprise',
  'uprose'     => 'uprise',
  'upswept'      => 'upsweep',
  'upswung'      => 'upswing',
  'waked'        => 'wake',
  'was'        => 'be',
  'waylaid'      => 'waylay',
  'wedded'     => 'wed',
  'went'       => 'go',
  'wept'       => 'weep',
  'were'       => 'be',
  'wetted'     => 'wet',
  'winded'     => 'wind',
  'wist'       => 'wit',
  'wot'        => 'wit',
  'withdrawn'      => 'withdraw',
  'withdrew'     => 'withdraw',
  'withheld'     => 'withhold',
  'withstood'      => 'withstand',
  'woke'       => 'wake',
  'woken'        => 'wake',
  'won'        => 'win',
  'wore'       => 'wear',
  'worked'     => 'work',
  'worn'       => 'wear',
  'wound'        => 'wind',
  'wove'       => 'weave',
  'woven'        => 'weave',
  'written'      => 'write',
  'wrote'        => 'write',
  'wrought'      => 'work',
  'wrung'        => 'wring',
}
InfSuffixRules =

Mapping of word suffixes to infinitive rules.

{
  # '<suffix>' => {
  #  :order => <sort order>,
  #  :rule  => <rule number>,

  # :word1 == 0 => Use 0, the index of the longest prefix
  #  within @{$prefix{$self->{'suffix'} } }, below.

  # :word1 == 1 => Use 1, the index of the 2nd longest prefix
  #  within @{$prefix{$self->{'suffix'} } }, below.

  # :word1 == -1 => Use the index of the shortest prefix
  #  within @{$prefix{$self->{'suffix'} } }, below + a letter.

  # :word1 == -2 => Use the index of the shortest prefix
  #  within @{$prefix{$self->{'suffix'} } }, below + a letter,
  #  and use the shortest prefix as well.

  # :word1 == -3 => Use the index of the shortest prefix
  #  within @{$prefix{$self->{'suffix'} } }, below + meter,
  #  and use the shortest prefix + metre as well.

  # :word1 == -4 => Use the original string.
  'hes' => {
    :order    => 1011,
    :rule   => '1',
    :word1    => 0, # Longest prefix.
    :suffix1  => '',
    :suffix2  => '',
  },
  'ses' => {
    :order    => 1021,
    :rule   => '2',
    :word1    => 0, # Longest prefix.
    :suffix1  => '',
    :suffix2  => '',
  },
  'xes' => {
    :order    => 1031,
    :rule   => '3',
    :word1    => 0, # Longest prefix.
    :suffix1  => '',
    :suffix2  => '',
  },
  'zes' => {
    :order    => 1041,
    :rule   => '4',
    :word1    => 0, # Longest prefix.
    :suffix1  => '',
    :suffix2  => '',
  },
  'iless' => {
    :order    => 1051,
    :rule   => '43a',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'y',
    :suffix2  => '',
  },
  'less' => {
    :order    => 1052,
    :rule   => '43b',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => '',
    :suffix2  => '',
  },
  'iness' => {
    :order    => 1053,
    :rule   => '44a',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'y',
    :suffix2  => '',
  },
  'ness' => {
    :order    => 1054,
    :rule   => '44b',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => '',
    :suffix2  => '',
  },
  "'s" => {
    :order    => 1055,
    :rule   => '7',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => '',
    :suffix2  => '',
  },
  'ies' => {
    :order    => 1056,
    :rule   => '13a',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'y',
    :suffix2  => '',
  },
  'es' => {
    :order    => 1057,
    :rule   => '13b',
    :word1    => 0, # Longest prefix.
    :suffix1  => '',
    :suffix2  => '',
  },
  'ss' => {
    :order    => 1061,
    :rule   => '6a',
    :word1    => -4,  # Original string.
    :suffix1  => '',
    :suffix2  => '',
  },
  's'   => {
    :order    => 1062,
    :rule   => '6b',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => '',
    :suffix2  => '',
  },
  'ater' => {
    :order    => 1081,
    :rule   => '8',
    :word1    => -4,  # Original string.
    :suffix1  => '',
    :suffix2  => '',
  },
  'cter' => {
    :order    => 1091,
    :rule   => '9',
    :word1    => -4,  # Original string.
    :suffix1  => '',
    :suffix2  => '',
  },
  'ier' => {
    :order    => 1101,
    :rule   => '10',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'y',
    :suffix2  => '',
  },
  'er' => {
    :order    => 1111,
    :rule   => '11',
    :word1    => 0, # Longest prefix.
    :suffix1  => '',
    :suffix2  => '',
  },
  'ied' => {
    :order    => 1121,
    :rule   => '12a',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'y',
    :suffix2  => '',
  },
  'ed' => {
    :order    => 1122,
    :rule   => '12b',  # There is extra code for 12b below.
    :word1    => 0, # Longest prefix.
    :suffix1  => '',
    :suffix2  => '',
  },
  'iest' => {
    :order    => 1141,
    :rule   => '14a',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'y',
    :suffix2  => '',
  },
  'est' => {
    :order    => 1142,
    :rule   => '14b',
    :word1    => -2,  # Shortest prefix + a letter, and shortest prefix.
    :suffix1  => 'e',
    :suffix2  => '',
  },
  'blity' => {
    :order    => 1143,
    :rule   => '21',
    :word1    => -4,  # Original string.
    :suffix1  => '',
    :suffix2  => '',
  },
  'bility' => {
    :order    => 1144,
    :rule   => '22',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'ble',
    :suffix2  => '',
  },
  'fiable' => {
    :order    => 1145,
    :rule   => '23',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'fy',
    :suffix2  => '',
  },
  'logist' => {
    :order    => 1146,
    :rule   => '24',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'logy',
    :suffix2  => '',
  },
  'ing' => {
    :order    => 1151,
    :rule   => '15', # There is extra code for 15 below.
    :word1    => -2,  # Shortest prefix + a letter, and shortest prefix.
    :suffix1  => 'e',
    :suffix2  => '',
  },
  'ist' => {
    :order    => 1161,
    :rule   => '16',
    :word1    => -2,  # Shortest prefix + a letter, and shortest prefix.
    :suffix1  => 'e',
    :suffix2  => '',
  },
  'ism' => {
    :order    => 1171,
    :rule   => '17',
    :word1    => -2,  # Shortest prefix + a letter, and shortest prefix.
    :suffix1  => 'e',
    :suffix2  => '',
  },
  'ity' => {
    :order    => 1181,
    :rule   => '18',
    :word1    => -2,  # Shortest prefix + a letter, and shortest prefix.
    :suffix1  => 'e',
    :suffix2  => '',
  },
  'ize' => {
    :order    => 1191,
    :rule   => '19',
    :word1    => -2,  # Shortest prefix + a letter, and shortest prefix.
    :suffix1  => 'e',
    :suffix2  => '',
  },
  'cable' => {
    :order    => 1201,
    :rule   => '20a',
    :word1    => -4,  # Original string.
    :suffix1  => '',
    :suffix2  => '',
  },
  'gable' => {
    :order    => 1202,
    :rule   => '20b',
    :word1    => -4,  # Original string.
    :suffix1  => '',
    :suffix2  => '',
  },
  'able' => {
    :order    => 1203,
    :rule   => '20c',
    :word1    => -2,  # Shortest prefix + a letter, and shortest prefix.
    :suffix1  => 'e',
    :suffix2  => '',
  },
  'graphic' => {
    :order    => 1251,
    :rule   => '25',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'graphy',
    :suffix2  => '',
  },
  'istic' => {
    :order    => 1261,
    :rule   => '26',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'ist',
    :suffix2  => '',
  },
  'itic' => {
    :order    => 1271,
    :rule   => '27',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'ite',
    :suffix2  => '',
  },
  'like' => {
    :order    => 1281,
    :rule   => '28',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => '',
    :suffix2  => '',
  },
  'logic' => {
    :order    => 1291,
    :rule   => '29',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'logy',
    :suffix2  => '',
  },
  'ment' => {
    :order    => 1301,
    :rule   => '30',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => '',
    :suffix2  => '',
  },
  'mental' => {
    :order    => 1311,
    :rule   => '31',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'ment',
    :suffix2  => '',
  },
  'metry' => {
    :order    => 1321,
    :rule   => '32',
    :word1    => -3,  # Shortest prefix + meter, and shortest perfix + metre.
    :suffix1  => 'meter',
    :suffix2  => 'metre',
  },
  'nce' => {
    :order    => 1331,
    :rule   => '33',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'nt',
    :suffix2  => '',
  },
  'ncy' => {
    :order    => 1341,
    :rule   => '34',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'nt',
    :suffix2  => '',
  },
  'ship' => {
    :order    => 1351,
    :rule   => '35',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => '',
    :suffix2  => '',
  },
  'ical' => {
    :order    => 1361,
    :rule   => '36',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'ic',
    :suffix2  => '',
  },
  'ional' => {
    :order    => 1371,
    :rule   => '37',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'ion',
    :suffix2  => '',
  },
  'bly' => {
    :order    => 1381,
    :rule   => '38',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'ble',
    :suffix2  => '',
  },
  'ily' => {
    :order    => 1391,
    :rule   => '39',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'y',
    :suffix2  => '',
  },
  'ly' => {
    :order    => 1401,
    :rule   => '40',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => '',
    :suffix2  => '',
  },
  'iful' => {
    :order    => 1411,
    :rule   => '41a',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'y',
    :suffix2  => '',
  },
  'ful' => {
    :order    => 1412,
    :rule   => '41b',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => '',
    :suffix2  => '',
  },
  'ihood' => {
    :order    => 1421,
    :rule   => '42a',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'y',
    :suffix2  => '',
  },
  'hood' => {
    :order    => 1422,
    :rule   => '42b',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => '',
    :suffix2  => '',
  },
  'ification' => {
    :order    => 1451,
    :rule   => '45',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'ify',
    :suffix2  => '',
  },
  'ization' => {
    :order    => 1461,
    :rule   => '46',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'ize',
    :suffix2  => '',
  },
  'ction' => {
    :order    => 1471,
    :rule   => '47',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'ct',
    :suffix2  => '',
  },
  'rtion' => {
    :order    => 1481,
    :rule   => '48',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'rt',
    :suffix2  => '',
  },
  'ation' => {
    :order    => 1491,
    :rule   => '49',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'ate',
    :suffix2  => '',
  },
  'ator' => {
    :order    => 1501,
    :rule   => '50',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'ate',
    :suffix2  => '',
  },
  'ctor' => {
    :order    => 1511,
    :rule   => '51',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'ct',
    :suffix2  => '',
  },
  'ive' => {
    :order    => 1521,
    :rule   => '52',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'ion',
    :suffix2  => '',
  },
  'onian' => {
    :order    => 1530,
    :rule   => '54',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'on',
    :suffix2  => '',
  },
  'an' => {
    :order    => 1531,
    :rule   => '53',
    :word1    => -1,  # Shortest prefix.
    :suffix1  => 'a',
    :suffix2  => '',
  },
}
InfSuffixRuleOrder =
InfSuffixRules.keys.sort_by {|rule| InfSuffixRules[rule][:order]}

Class Method Summary collapse

Class Method Details

.a(phrase, count = nil) ⇒ Object Also known as: an, A, AN

Return the given phrase with the appropriate indefinite article (“a” or “an”) prepended.



1152
1153
1154
1155
1156
1157
1158
1159
# File 'lib/linguistics/en.rb', line 1152

def a( phrase, count=nil )
  md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
  pre, word, post = md.to_a[1,3]
  return phrase if word.nil? or word.empty?

  result = indef_article( word, count )
  return pre + result + post
end

.conjunction(obj, args = {}) ⇒ Object

Return the specified obj (which must support the #collect method) as a conjunction. Each item is converted to a String if it is not already (using #to_s) unless a block is given, in which case it is called once for each object in the array, and the stringified return value from the block is used instead. Returning nil causes that particular element to be omitted from the resulting conjunction. The following options can be used to control the makeup of the returned conjunction String:

:separator

Specify one or more characters to separate items in the resulting list. Defaults to ', '.

:altsep

An alternate separator to use if any of the resulting conjunction’s clauses contain the :separator character/s. Defaults to '; '.

:penultimate

Flag that indicates whether or not to join the last clause onto the rest of the conjunction using a penultimate :separator. E.g.,

%w{duck, cow, dog}.en.conjunction
# => "a duck, a cow, and a dog"
%w{duck cow dog}.en.conjunction( :penultimate => false )
"a duck, a cow and a dog"

Default to true.

:conjunctive

Sets the word used as the conjunctive (separating word) of the resulting string. Default to 'and'.

:combine

If set to true (the default), items which are indentical (after surrounding spaces are stripped) will be combined in the resulting conjunction. E.g.,

%w{goose cow goose dog}.en.conjunction
# => "two geese, a cow, and a dog"
%w{goose cow goose dog}.en.conjunction( :combine => false )
# => "a goose, a cow, a goose, and a dog"
:casefold

If set to true (the default), then items are compared case-insensitively when combining them. This has no effect if :combine is false.

:generalize

If set to true, then quantities of combined items are turned into general descriptions instead of exact amounts.

ary = %w{goose pig dog horse goose reindeer goose dog horse}
ary.en.conjunction
# => "three geese, two dogs, two horses, a pig, and a reindeer"
ary.en.conjunction( :generalize => true )
# => "several geese, several dogs, several horses, a pig, and a reindeer"

See the #quantify method for specifics on how quantities are generalized. Generalization defaults to false, and has no effect if :combine is false.

:quantsort

If set to true (the default), items which are combined in the resulting conjunction will be listed in order of amount, with greater quantities sorted first. If :quantsort is false, combined items will appear where the first instance of them occurred in the list. This sort is also the fallback for indentical quantities (ie., items of the same quantity will be listed in the order they appeared in the source list).



1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
# File 'lib/linguistics/en.rb', line 1462

def conjunction( obj, args={} )
  config = ConjunctionDefaults.dup.update( args )
  phrases = []

  # Transform items in the obj to phrases
  if block_given?
    phrases = obj.collect {|item| yield(item) }.compact
  else
    phrases = obj.collect {|item| item.to_s }
  end

  # No need for a conjunction if there's only one thing
  return a(phrases[0]) if phrases.length < 2

  # Set up a Proc to derive a collector key from a phrase depending on the
  # configuration
  keyfunc =
    if config[:casefold]
      proc {|key| key.downcase.strip}
    else
      proc {|key| key.strip}
    end
  
  # Count and delete phrases that hash the same when the keyfunc munges
  # them into the same thing if we're combining (:combine => true).
  collector = {}
  if config[:combine]
  
    phrases.each_index do |i|
      # Stop when reaching the end of a truncated list
      break if phrases[i].nil?

      # Make the key using the configured key function
      phrase = keyfunc[ phrases[i] ]

      # If the collector already has this key, increment its count,
      # eliminate the duplicate from the phrase list, and redo the loop.
      if collector.key?( phrase )
        collector[ phrase ] += 1
        phrases.delete_at( i )
        redo
      end

      collector[ phrase ] = 1
    end
  else
    # If we're not combining, just make everything have a count of 1.
    phrases.uniq.each {|key| collector[ keyfunc[key] ] = 1}
  end

  # If sort-by-quantity is turned on, sort the phrases first by how many
  # there are (most-first), and then by the order they were specified in.
  if config[:quantsort] && config[:combine]
    origorder = {}
    phrases.each_with_index {|phrase,i| origorder[ keyfunc[phrase] ] ||= i }
    phrases.sort! {|a,b|
      (collector[ keyfunc[b] ] <=> collector[ keyfunc[a] ]).nonzero? ||
      (origorder[ keyfunc[a] ] <=> origorder[ keyfunc[b] ])
    }
  end

  # Set up a filtering function that adds either an indefinite article, an
  # indefinite quantifier, or a definite quantifier to each phrase
  # depending on the configuration and the count of phrases in the
  # collector.
  filter =
    if config[:generalize]
      proc {|phrase, count| quantify(phrase, count) }
    else
      proc {|phrase, count|
      if count > 1
        "%s %s" % [
          # :TODO: Make this threshold settable
          count < 10 ? count.en.numwords : count.to_s,
          plural(phrase, count)
        ]
      else
        a( phrase )
      end
    }
    end

  # Now use the configured filter to turn each phrase into its final
  # form. Hmmm... square-bracket Lisp?
  phrases.collect! {|phrase| filter[phrase, collector[ keyfunc[phrase] ]] }

  # Prepend the conjunctive to the last element unless it's empty or
  # there's only one element
  phrases[-1].insert( 0, config[:conjunctive] + " " ) unless
    config[:conjunctive].strip.empty? or
    phrases.length < 2

  # Catenate the last two elements if there's no penultimate separator,
  # and pick a separator based on how many phrases there are and whether
  # or not there's already an instance of it in the phrases.
  phrases[-2] << " " << phrases.pop unless config[:penultimate]
  sep = if phrases.length <= 2
        ' '
      elsif phrases.grep( /#{config[:separator]}/ ).empty?
        config[:separator]
      else
        config[:altsep]
      end

  return phrases.join( sep )
end

.debugMsg(*msgs) ⇒ Object

Debugging output



645
646
647
# File 'lib/linguistics/en.rb', line 645

def debugMsg( *msgs ) # :nodoc:
  $stderr.puts msgs.join(" ") if $DEBUG
end

.def_synset_function(meth) ⇒ Object

Make a function that calls the method meth on the synset of an input word.



127
128
129
130
131
132
133
134
135
136
137
138
139
# File 'lib/linguistics/en/wordnet.rb', line 127

def def_synset_function( meth )
  (class << self; self; end).instance_eval do
    define_method( meth ) {|*args|
      word, pos, sense = *args
      raise ArgumentError,
        "wrong number of arguments (0 for 1)" unless word
      sense ||= 1

      syn = synset( word.to_s, pos, sense )
      return syn.nil? ? nil : syn.send( meth )
    }
  end
end

.hasLinkParser?Boolean

Returns true if LinkParser was loaded okay

Returns:

  • (Boolean)


97
# File 'lib/linguistics/en/linkparser.rb', line 97

def hasLinkParser? ; @hasLinkParser ; end

.hasWordnet?Boolean

Returns true if WordNet was loaded okay

Returns:

  • (Boolean)


107
# File 'lib/linguistics/en/wordnet.rb', line 107

def hasWordnet? ; @hasWordnet; end

.indef_article(word, count) ⇒ Object

Returns the given word with a prepended indefinite article, unless count is non-nil and not singular.



922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
# File 'lib/linguistics/en.rb', line 922

def indef_article( word, count )
  count ||= Linguistics::num
  return "#{count} #{word}" if
    count && /^(#{PL_count_one})$/i !~ count.to_s

  # Handle user-defined variants
  # return value if value = ud_match( word, A_a_user_defined )

  case word

  # Handle special cases
  when /^(#{A_explicit_an})/i
    return "an #{word}"

  # Handle abbreviations
  when /^(#{A_abbrev})/x
    return "an #{word}"
  when /^[aefhilmnorsx][.-]/i
    return "an #{word}"
  when /^[a-z][.-]/i 
    return "a #{word}"

  # Handle consonants
  when /^[^aeiouy]/i
    return "a #{word}"

  # Handle special vowel-forms
  when /^e[uw]/i 
    return "a #{word}"
  when /^onc?e\b/i 
    return "a #{word}"
  when /^uni([^nmd]|mo)/i
    return "a #{word}"
  when /^u[bcfhjkqrst][aeiou]/i
    return "a #{word}"

  # Handle vowels
  when /^[aeiou]/i
    return "an #{word}"

  # Handle y... (before certain consonants implies (unnaturalized) "i.." sound)
  when /^(#{A_y_cons})/i
    return "an #{word}"

  # Otherwise, guess "a"
  else
    return "a #{word}"
  end
end

.infinitive(word) ⇒ Object

Return the infinitive form of the given word



1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
# File 'lib/linguistics/en/infinitive.rb', line 1050

def infinitive( word )
  word = word.to_s
  word1 = word2 = suffix = rule = newword = ''

  if IrregularInfinitives.key?( word )
    word1 = IrregularInfinitives[ word ]
    rule  = 'irregular'
  else
    # Build up $prefix{$suffix} as an array of prefixes, from longest to shortest.
    prefix, suffix = nil
    prefixes = Hash::new {|hsh,key| hsh[key] = []}

    # Build the hash of prefixes for the word
    1.upto( word.length ) {|i|
      prefix = word[0, i]
      suffix = word[i..-1]

      (suffix.length - 1).downto( 0 ) {|j|
        newword = prefix + suffix[0, j]
        prefixes[ suffix ].push( newword )
      }
    }

    $stderr.puts "prefixes: %p" % prefixes if $DEBUG

    # Now check for rules covering the prefixes for this word, picking
    # the first one if one was found.
    if (( suffix = ((InfSuffixRuleOrder & prefixes.keys).first) ))
      rule = InfSuffixRules[ suffix ][:rule]
      shortestPrefix = InfSuffixRules[ suffix ][:word1]
      $stderr.puts "Using rule %p (%p) for suffix %p" % 
        [ rule, shortestPrefix, suffix ] if $DEBUG

      case shortestPrefix
      when 0
        word1 = prefixes[ suffix ][ 0 ]
        word2 = prefixes[ suffix ][ 1 ]
        $stderr.puts "For sp = 0: word1: %p, word2: %p" %
          [ word1, word2 ] if $DEBUG

      when -1
        word1 = prefixes[ suffix ].last +
          InfSuffixRules[ suffix ][:suffix1]
        word2 = ''
        $stderr.puts "For sp = -1: word1: %p, word2: %p" %
          [ word1, word2 ] if $DEBUG

      when -2
        word1 = prefixes[ suffix ].last +
          InfSuffixRules[ suffix ][:suffix1]
        word2 = prefixes[ suffix ].last
        $stderr.puts "For sp = -2: word1: %p, word2: %p" %
          [ word1, word2 ] if $DEBUG

      when -3
        word1 = prefixes[ suffix ].last +
          InfSuffixRules[ suffix ][:suffix1]
        word2 = prefixes[ suffix ].last +
          InfSuffixRules[ suffix ][:suffix2]
        $stderr.puts "For sp = -3: word1: %p, word2: %p" %
          [ word1, word2 ] if $DEBUG

      when -4
        word1 = word
        word2 = ''
        $stderr.puts "For sp = -4: word1: %p, word2: %p" %
          [ word1, word2 ] if $DEBUG

      else
        raise IndexError,
          "Couldn't find rule for shortest prefix %p" %
          shortestPrefix
      end

      # Rules 12b and 15: Strip off 'ed' or 'ing'.
      if rule == '12b' or rule == '15'
        # Do we have a monosyllable of this form:
        # o 0+ Consonants
        # o 1+ Vowel
        # o 2 Non-wx
        # Eg: tipped => tipp?
        # Then return tip and tipp.
        # Eg: swimming => swimm?
        # Then return tipswim and swimm.

        if /^([^aeiou]*[aeiou]+)([^wx])\2$/ =~ word2
          word1 = $1 + $2
          word2 = $1 + $2 + $2
        end
      end
    end
  end

  return Infinitive::new( word1, word2, suffix, rule )
end

.languageObject

Return the name of the language this module is for.



1085
1086
1087
# File 'lib/linguistics/en.rb', line 1085

def language
  "English"
end

.linkParse(sent) ⇒ Object Also known as: sentence

Return a LinkParser::Sentence, with or without a sentence in it.



136
137
138
# File 'lib/linguistics/en/linkparser.rb', line 136

def linkParse( sent )
  return Linguistics::EN::linkParser.parse( sent.to_s )
end

.linkParserObject

The instance of LinkParser used for all Linguistics LinkParser functions.



105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# File 'lib/linguistics/en/linkparser.rb', line 105

def linkParser
  if @lpError
    raise NotImplementedError, 
      "LinkParser functions are not loaded: %s" %
      @lpError.message
  end

  return @lpParser if ! @lpParser.nil?

  LinkParser::Word::extend( Linguistics )
  Linguistics::installDelegatorProxy( LinkParser::Word, :en )

  dictOpts = Hash.new('')
  dictOpts['datadir'] = 'ElizaData'
dictOpts['dict'] = 'tiny.dict'
  parseOpts = Hash.new

  @lpParser = LinkParser.new( dictOpts, parseOpts )
end

.lpErrorObject

If #hasLinkParser? returns false, this can be called to fetch the exception which was raised when trying to load LinkParser.



101
# File 'lib/linguistics/en/linkparser.rb', line 101

def lpError ; @lpError ; end

.matchgroup(*parts) ⇒ Object

Wrap one or more parts in a non-capturing alteration Regexp



121
122
123
124
# File 'lib/linguistics/en.rb', line 121

def self::matchgroup( *parts )
  re = parts.flatten.join("|")
  "(?:#{re})"
end

.no(phrase, count = nil) ⇒ Object Also known as: NO

Translate zero-quantified phrase to “no phrase.plural



1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
# File 'lib/linguistics/en.rb', line 1166

def no( phrase, count=nil )
  md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
  pre, word, post = md.to_a[1,3]
  count ||= Linguistics::num || 0

  unless /^#{PL_count_zero}$/ =~ count.to_s
    return "#{pre}#{count} " + plural( word, count ) + post
  else
    return "#{pre}no " + plural( word, 0 ) + post
  end
end

.normalizeCount(count, default = 2) ⇒ Object

Normalize a count to either 1 or 2 (singular or plural)



651
652
653
654
655
656
657
658
659
660
# File 'lib/linguistics/en.rb', line 651

def normalizeCount( count, default=2 )
  return default if count.nil? # Default to plural
  if /^(#{PL_count_one})$/i =~ count.to_s ||
      Linguistics::classical? &&
      /^(#{PL_count_zero})$/ =~ count.to_s
    return 1
  else
    return default
  end
end

.number_to_words(num, config) ⇒ Object

Return the specified number num as an array of number phrases.



1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
# File 'lib/linguistics/en.rb', line 1027

def number_to_words( num, config )
  return [config[:zero]] if num.to_i.zero?
  chunks = []

  # Break into word-groups if groups is set
  if config[:group].nonzero?

    # Build a Regexp with <config[:group]> number of digits. Any past
    # the first are optional.
    re = Regexp::new( "(\\d)" + ("(\\d)?" * (config[:group] - 1)) )

    # Scan the string, and call the word-chunk function that deals with
    # chunks of the found number of digits.
    num.to_s.scan( re ) {|digits|
      debugMsg "   digits = #{digits.inspect}"
      fn = NumberToWordsFunctions[ digits.nitems ]
      numerals = digits.flatten.compact.collect {|i| i.to_i}
      debugMsg "   numerals = #{numerals.inspect}"
      chunks.push fn.call( config[:zero], *numerals ).strip
    }
  else
    phrase = num.to_s
    phrase.sub!( /\A\s*0+/, '' )
    mill = 0

    # Match backward from the end of the digits in the string, turning
    # chunks of three, of two, and of one into words.
    mill += 1 while
      phrase.sub!( /(\d)(\d)(\d)(?=\D*\Z)/ ) {
        words = to_hundreds( $1.to_i, $2.to_i, $3.to_i, mill, 
                   config[:and] )
        chunks.unshift words.strip.squeeze(' ') unless words.nil?
        ''
      }        

    phrase.sub!( /(\d)(\d)(?=\D*\Z)/ ) {
      chunks.unshift to_tens( $1.to_i, $2.to_i, mill ).strip.squeeze(' ')
      ''
    }
    phrase.sub!( /(\d)(?=\D*\Z)/ ) {
      chunks.unshift to_units( $1.to_i, mill ).strip.squeeze(' ')
      ''
    }
  end

  return chunks
end

.numwords(number, hashargs = {}) ⇒ Object

Return the specified number as english words. One or more configuration values may be passed to control the returned String:

:group

Controls how many numbers at a time are grouped together. Valid values are 0 (normal grouping), 1 (single-digit grouping, e.g., “one, two, three, four”), 2 (double-digit grouping, e.g., “twelve, thirty-four”, or 3 (triple-digit grouping, e.g., “one twenty-three, four”).

:comma

Set the character/s used to separate word groups. Defaults to “, ”.

:and

Set the word and/or characters used where ‘ and ’ (the default) is normally used. Setting :and to ‘ ’, for example, will cause 2556 to be returned as “two-thousand, five hundred fifty-six” instead of “”two-thousand, five hundred and fifty-six“.

:zero

Set the word used to represent the numeral 0 in the result. ‘zero’ is the default.

:decimal

Set the translation of any decimal points in the number; the default is ‘point’.

:asArray

If set to a true value, the number will be returned as an array of word groups instead of a String.



1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
# File 'lib/linguistics/en.rb', line 1224

def numwords( number, hashargs={} )
  num = number.to_s
  config = NumwordDefaults.dup.update( hashargs )
  raise "Bad chunking option: #{config[:group]}" unless
    config[:group].between?( 0, 3 )

  # Array of number parts: first is everything to the left of the first
  # decimal, followed by any groups of decimal-delimted numbers after that
  parts = []

  # Wordify any sign prefix
  sign = (/\A\s*\+/ =~ num) ? 'plus' : (/\A\s*\-/ =~ num) ? 'minus' : ''

  # Strip any ordinal suffixes
  ord = true if num.sub!( /(st|nd|rd|th)\Z/, '' )

  # Split the number into chunks delimited by '.'
  chunks = if !config[:decimal].empty? then
         if config[:group].nonzero?
           num.split(/\./)
         else
           num.split(/\./, 2)
         end
       else
         [ num ]
       end

  # Wordify each chunk, pushing arrays into the parts array
  chunks.each_with_index {|chunk,section|
    chunk.gsub!( /\D+/, '' )

    # If there's nothing in this chunk of the number, set it to zero
    # unless it's the whole-number part, in which case just push an
    # empty array.
    if chunk.empty?
      if section.zero?
        parts.push []
        next 
      end
    end

    # Split the number section into wordified parts unless this is the
    # second or succeeding part of a non-group number
    unless config[:group].zero? && section.nonzero?
      parts.push number_to_words( chunk, config )
    else
      parts.push number_to_words( chunk, config.dup.update(:group => 1) )
    end         
  }

  debugMsg "Parts => #{parts.inspect}"
  
  # Turn the last word of the whole-number part back into an ordinal if
  # the original number came in that way.
  if ord && !parts[0].empty?
    parts[0][-1] = ordinal( parts[0].last )
  end

  # If the caller's expecting an Array return, just flatten and return the
  # parts array.
  if config[:asArray]
    unless sign.empty?
      parts[0].unshift( sign )
    end
    return parts.flatten
  end

  # Catenate each sub-parts array into a whole number part and one or more
  # post-decimal parts. If grouping is turned on, all sub-parts get joined
  # with commas, otherwise just the whole-number part is.
  if config[:group].zero?
    if parts[0].nitems > 1

      # Join all but the last part together with commas
      wholenum = parts[0][0...-1].join( config[:comma] )

      # If the last part is just a single word, append it to the
      # wholenum part with an 'and'. This is to get things like 'three
      # thousand and three' instead of 'three thousand, three'.
      if /^\s*(\S+)\s*$/ =~ parts[0].last
        wholenum += " and #{parts[0].last}"
      else
        wholenum += config[:comma] + parts[0].last
      end
    else
      wholenum = parts[0][0]
    end
    decimals = parts[1..-1].collect {|part| part.join(" ")}

    debugMsg "Wholenum: #{wholenum.inspect}; decimals: #{decimals.inspect}"

    # Join with the configured decimal; if it's empty, just join with
    # spaces.
    unless config[:decimal].empty?
      return sign + ([ wholenum ] + decimals).
        join( " #{config[:decimal]} " ).strip
    else
      return sign + ([ wholenum ] + decimals).
        join( " " ).strip
    end
  else
    return parts.compact.
      separate( config[:decimal] ).
      delete_if {|el| el.empty?}.
      join( config[:comma] ).
      strip
  end
end

.ordinal(number) ⇒ Object Also known as: ORD

Transform the given number into an ordinal word. The number object can be either an Integer or a String.



1336
1337
1338
1339
1340
1341
1342
1343
1344
# File 'lib/linguistics/en.rb', line 1336

def ordinal( number )
  case number
  when Integer
    return number.to_s + (Nth[ number % 100 ] || Nth[ number % 10 ])

  else
    return number.to_s.sub( /(#{OrdinalSuffixes})\Z/ ) { Ordinals[$1] }
  end
end

.plural(phrase, count = nil) ⇒ Object Also known as: PL

Return the plural of the given phrase if count indicates it should be plural.



1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
# File 'lib/linguistics/en.rb', line 1092

def plural( phrase, count=nil )
  md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
  pre, word, post = md.to_a[1,3]
  return phrase if word.nil? or word.empty?

  plural = postprocess( word,
    pluralize_special_adjective(word, count) ||
    pluralize_special_verb(word, count) ||
    pluralize_noun(word, count) )

  return pre + plural + post
end

.plural_adjective(phrase, count = nil) ⇒ Object Also known as: plural_adj, PL_ADJ

Return the plural of the given adjectival phrase if count indicates it should be plural.



1137
1138
1139
1140
1141
1142
1143
1144
1145
# File 'lib/linguistics/en.rb', line 1137

def plural_adjective( phrase, count=nil )
  md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
  pre, word, post = md.to_a[1,3]
  return phrase if word.nil? or word.empty?

  plural = postprocess( word,
    pluralize_special_adjective(word, count) || word )
  return pre + plural + post
end

.plural_noun(phrase, count = nil) ⇒ Object Also known as: PL_N

Return the plural of the given noun phrase if count indicates it should be plural.



1109
1110
1111
1112
1113
1114
1115
1116
# File 'lib/linguistics/en.rb', line 1109

def plural_noun( phrase, count=nil )
  md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
  pre, word, post = md.to_a[1,3]
  return phrase if word.nil? or word.empty?

  plural = postprocess( word, pluralize_noun(word, count) )
  return pre + plural + post
end

.plural_verb(phrase, count = nil) ⇒ Object Also known as: PL_V

Return the plural of the given verb phrase if count indicates it should be plural.



1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
# File 'lib/linguistics/en.rb', line 1122

def plural_verb( phrase, count=nil )
  md = /\A(\s*)(.+?)(\s*)\Z/.match( phrase.to_s )
  pre, word, post = md.to_a[1,3]
  return phrase if word.nil? or word.empty?

  plural = postprocess( word,
    pluralize_special_verb(word, count) ||
    pluralize_general_verb(word, count) )
  return pre + plural + post
end

.pluralize_general_verb(word, count) ⇒ Object

Pluralize regular verbs



860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
# File 'lib/linguistics/en.rb', line 860

def pluralize_general_verb( word, count )
  count ||= Linguistics::num
  count = normalizeCount( count )
  
  return word if /^(#{PL_count_one})$/i =~ count.to_s

  case word

  # Handle ambiguous present tenses  (simple and compound)
  when /^(#{PL_v_ambiguous_pres})((\s.*)?)$/i
    return PL_v_ambiguous_pres_h[ $1.downcase ] + $2

  # Handle ambiguous preterite and perfect tenses
  when /^(#{PL_v_ambiguous_non_pres})((\s.*)?)$/i
    return word

  # Otherwise, 1st or 2nd person is uninflected
  else
    return word
  end
end

.pluralize_noun(word, count = nil) ⇒ Object

Pluralize nouns



687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
# File 'lib/linguistics/en.rb', line 687

def pluralize_noun( word, count=nil )
  value = nil
  count ||= Linguistics::num
  count = normalizeCount( count )

  return word if count == 1

  # Handle user-defined nouns
  #if value = ud_match( word, PL_sb_user_defined )
  #  return value
  #end

  # Handle empty word, singular count and uninflected plurals
  case word
  when ''
    return word
  when /^(#{PL_sb_uninflected})$/i
    return word
  else
    if Linguistics::classical? &&
       /^(#{PL_sb_uninflected_herd})$/i =~ word
      return word
    end
  end

  # Handle compounds ("Governor General", "mother-in-law", "aide-de-camp", etc.)
  case word
  when /^(?:#{PL_sb_postfix_adj})$/i
    value = $2
    return pluralize_noun( $1, 2 ) + value

  when /^(?:#{PL_sb_prep_dual_compound})$/i
    value = [ $2, $3 ] 
    return pluralize_noun( $1, 2 ) + value[0] + pluralize_noun( value[1] )

  when /^(?:#{PL_sb_prep_compound})$/i
    value = $2 
    return pluralize_noun( $1, 2 ) + value

  # Handle pronouns
  when /^((?:#{PL_prep})\s+)(#{PL_pron_acc})$/i
    return $1 + PL_pron_acc_h[ $2.downcase ]

  when /^(#{PL_pron_nom})$/i
    return PL_pron_nom_h[ word.downcase ]

  when /^(#{PL_pron_acc})$/i
    return PL_pron_acc_h[ $1.downcase ]

  # Handle isolated irregular plurals 
  when /(.*)\b(#{PL_sb_irregular})$/i
    return $1 + PL_sb_irregular_h[ $2.downcase ]

  when /(#{PL_sb_U_man_mans})$/i
    return "#{$1}s"

  # Handle families of irregular plurals
  when /(.*)man$/i ;         return "#{$1}men"
  when /(.*[ml])ouse$/i ;        return "#{$1}ice"
  when /(.*)goose$/i ;       return "#{$1}geese"
  when /(.*)tooth$/i ;       return "#{$1}teeth"
  when /(.*)foot$/i ;          return "#{$1}feet"

  # Handle unassimilated imports
  when /(.*)ceps$/i ;          return word
  when /(.*)zoon$/i ;          return "#{$1}zoa"
  when /(.*[csx])is$/i ;       return "#{$1}es"
  when /(#{PL_sb_U_ex_ices})ex$/i; return "#{$1}ices"
  when /(#{PL_sb_U_ix_ices})ix$/i; return "#{$1}ices"
  when /(#{PL_sb_U_um_a})um$/i ;   return "#{$1}a"
  when /(#{PL_sb_U_us_i})us$/i ;   return "#{$1}i"
  when /(#{PL_sb_U_on_a})on$/i ;   return "#{$1}a"
  when /(#{PL_sb_U_a_ae})$/i ;   return "#{$1}e"
  end

  # Handle incompletely assimilated imports
  if Linguistics::classical?
    case word
    when /(.*)trix$/i ;       return "#{$1}trices"
    when /(.*)eau$/i ;        return "#{$1}eaux"
    when /(.*)ieu$/i ;        return "#{$1}ieux"
    when /(.{2,}[yia])nx$/i ;   return "#{$1}nges"
    when /(#{PL_sb_C_en_ina})en$/i; return "#{$1}ina"
    when /(#{PL_sb_C_ex_ices})ex$/i;  return "#{$1}ices"
    when /(#{PL_sb_C_ix_ices})ix$/i;  return "#{$1}ices"
    when /(#{PL_sb_C_um_a})um$/i ;  return "#{$1}a"
    when /(#{PL_sb_C_us_i})us$/i ;  return "#{$1}i"
    when /(#{PL_sb_C_us_us})$/i ; return "#{$1}"
    when /(#{PL_sb_C_a_ae})$/i ;  return "#{$1}e"
    when /(#{PL_sb_C_a_ata})a$/i ;  return "#{$1}ata"
    when /(#{PL_sb_C_o_i})o$/i ;  return "#{$1}i"
    when /(#{PL_sb_C_on_a})on$/i ;  return "#{$1}a"
    when /#{PL_sb_C_im}$/i ;    return "#{word}im"
    when /#{PL_sb_C_i}$/i ;     return "#{word}i"
    end
  end


  # Handle singular nouns ending in ...s or other silibants
  case word
  when /^(#{PL_sb_singular_s})$/i; return "#{$1}es"
  when /^([A-Z].*s)$/;       return "#{$1}es"
  when /(.*)([cs]h|[zx])$/i ;      return "#{$1}#{$2}es"
  # when /(.*)(us)$/i ;        return "#{$1}#{$2}es"

  # Handle ...f -> ...ves
  when /(.*[eao])lf$/i ;       return "#{$1}lves"; 
  when /(.*[^d])eaf$/i ;       return "#{$1}eaves"
  when /(.*[nlw])ife$/i ;        return "#{$1}ives"
  when /(.*)arf$/i ;         return "#{$1}arves"

  # Handle ...y
  when /(.*[aeiou])y$/i ;        return "#{$1}ys"
  when /([A-Z].*y)$/ ;       return "#{$1}s"
  when /(.*)y$/i ;         return "#{$1}ies"

  # Handle ...o
  when /#{PL_sb_U_o_os}$/i ;     return "#{word}s"
  when /[aeiou]o$/i ;          return "#{word}s"
  when /o$/i ;           return "#{word}es"

  # Otherwise just add ...s
  else
    return "#{word}s"
  end
end

.pluralize_special_adjective(word, count) ⇒ Object

Handle special adjectives



884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
# File 'lib/linguistics/en.rb', line 884

def pluralize_special_adjective( word, count )
  count ||= Linguistics::num
  count = normalizeCount( count )

  return word if /^(#{PL_count_one})$/i =~ count.to_s

  # Handle user-defined verbs
  #if value = ud_match( word, PL_adj_user_defined )
  #  return value
  #end

  case word

  # Handle known cases
  when /^(#{PL_adj_special})$/i
    return PL_adj_special_h[ $1.downcase ]

  # Handle possessives
  when /^(#{PL_adj_poss})$/i
    return PL_adj_poss_h[ $1.downcase ]

  when /^(.*)'s?$/
    pl = plural_noun( $1 )
    if /s$/ =~ pl
      return "#{pl}'"
    else
      return "#{pl}'s"
    end

  # Otherwise, no idea
  else
    return nil
  end
end

.pluralize_special_verb(word, count) ⇒ Object

Pluralize special verbs



817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
# File 'lib/linguistics/en.rb', line 817

def pluralize_special_verb( word, count )
  count ||= Linguistics::num
  count = normalizeCount( count )
  
  return nil if /^(#{PL_count_one})$/i =~ count.to_s

  # Handle user-defined verbs
  #if value = ud_match( word, PL_v_user_defined )
  #  return value
  #end

  case word

  # Handle irregular present tense (simple and compound)
  when /^(#{PL_v_irregular_pres})((\s.*)?)$/i
    return PL_v_irregular_pres_h[ $1.downcase ] + $2

  # Handle irregular future, preterite and perfect tenses 
  when /^(#{PL_v_irregular_non_pres})((\s.*)?)$/i
    return word

  # Handle special cases
  when /^(#{PL_v_special_s})$/, /\s/
    return nil

  # Handle standard 3rd person (chop the ...(e)s off single words)
  when /^(.*)([cs]h|[x]|zz|ss)es$/i
    return $1 + $2
  when /^(..+)ies$/i
    return "#{$1}y"
  when /^(.+)oes$/i
    return "#{$1}o"
  when /^(.*[^s])s$/i
    return $1

  # Otherwise, a regular verb (handle elsewhere)
  else
    return nil
  end
end

.postprocess(original, inflected) ⇒ Object

Do normal/classical switching and match capitalization in inflected by examining the original input.



665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
# File 'lib/linguistics/en.rb', line 665

def postprocess( original, inflected )
  inflected.sub!( /([^|]+)\|(.+)/ ) {
    Linguistics::classical? ? $2 : $1
  }

  case original
  when "I"
    return inflected
  when /^[A-Z]+$/
    return inflected.upcase
  when /^[A-Z]/
    # Can't use #capitalize, as it will downcase the rest of the string,
    # too.
    inflected[0,1] = inflected[0,1].upcase
    return inflected
  else
    return inflected
  end
end

.present_participle(word) ⇒ Object Also known as: part_pres, PART_PRES

Participles



1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
# File 'lib/linguistics/en.rb', line 1181

def present_participle( word )
       plural = plural_verb( word.to_s, 2 )
  
  plural.sub!( /ie$/, 'y' ) or
    plural.sub!( /ue$/, 'u' ) or
    plural.sub!( /([auy])e$/, '$1' ) or
    plural.sub!( /i$/, '' ) or
    plural.sub!( /([^e])e$/, "\\1" ) or
    /er$/.match( plural ) or
    plural.sub!( /([^aeiou][aeiouy]([bdgmnprst]))$/, "\\1\\2" )

       return "#{plural}ing"
end

.quantify(phrase, number = 0, args = {}) ⇒ Object

:joinword

Sets the word (and any surrounding spaces) used as the word separating the quantity from the noun in the resulting string. Defaults to ' of '.



1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
# File 'lib/linguistics/en.rb', line 1356

def quantify( phrase, number=0, args={} )
  num = number.to_i
  config = QuantifyDefaults.dup.update( args )
  
  case num
  when 0
    no( phrase )
  when 1
    a( phrase )
  when SeveralRange
    "several " + plural( phrase, num )
  when NumberRange
    "a number of " + plural( phrase, num )
  when NumerousRange
    "numerous " + plural( phrase, num )
  when ManyRange
    "many " + plural( phrase, num )
  else

    # Anything bigger than the ManyRange gets described like
    # "hundreds of thousands of..." or "millions of..."
    # depending, of course, on how many there are.
    thousands, subthousands = Math::log10( num ).to_i.divmod( 3 )
    stword =
      case subthousands
      when 2
        "hundreds"
      when 1
        "tens"
      else
        nil
      end
    thword = plural( to_thousands(thousands).strip )
    thword = nil if thword.empty?

    [ # Hundreds (of)...
      stword,

      # thousands (of)
      thword,

      # stars.
      plural(phrase, number)
    ].compact.join( config[:joinword] )
  end
end

.synset(word, pos = nil, sense = 1) ⇒ Object

Look up the synset associated with the given word or collocation in the WordNet lexicon and return a WordNet::Synset object.



154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
# File 'lib/linguistics/en/wordnet.rb', line 154

def synset( word, pos=nil, sense=1 )
  lex = Linguistics::EN::wnLexicon
  if pos.is_a?( Fixnum)
    sense = pos
    pos = nil
  end
  postries = pos ? [pos] : [:noun, :verb, :adjective, :adverb, :other]
  syn = nil

  postries.each {|pos|
    break if syn = lex.lookupSynsets( word.to_s, pos, sense )
  }

  return syn
end

.synsets(word, pos = nil) ⇒ Object

Look up all the synsets associated with the given word or collocation in the WordNet lexicon and return an Array of WordNet::Synset objects. If pos is nil, return synsets for all parts of speech.



174
175
176
177
178
179
180
181
182
183
184
# File 'lib/linguistics/en/wordnet.rb', line 174

def synsets( word, pos=nil )
  lex = Linguistics::EN::wnLexicon
  postries = pos ? [pos] : [:noun, :verb, :adjective, :adverb, :other]
  syns = []

  postries.each {|pos|
    syns << lex.lookupSynsets( word.to_s, pos )
  }

  return syns.flatten.compact
end

.to_hundreds(hundreds, tens = 0, units = 0, thousands = 0, joinword = " and ") ⇒ Object

Transform the specified number of hundreds-, tens-, and units-place numerals into a word phrase. If the number of thousands (thousands) is greater than 0, it will be used to determine where the decimal point is in relation to the hundreds-place number.



996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
# File 'lib/linguistics/en.rb', line 996

def to_hundreds( hundreds, tens=0, units=0, thousands=0, joinword=" and " )
  joinword = ' ' if joinword.empty?
  if hundreds.nonzero?
    return to_units( hundreds ) + " hundred" +
      (tens.nonzero? || units.nonzero? ? joinword : '') +
      to_tens( tens, units ) +
      to_thousands( thousands )
  elsif tens.nonzero? || units.nonzero?
    return to_tens( tens, units ) + to_thousands( thousands )
  else
    return nil
  end
end

.to_tens(tens, units, thousands = 0) ⇒ Object

Transform the specified number of tens- and units-place numerals into a word-phrase at the given number of thousands places.



982
983
984
985
986
987
988
989
# File 'lib/linguistics/en.rb', line 982

def to_tens( tens, units, thousands=0 )
  unless tens == 1
    return Tens[ tens ] + ( tens.nonzero? && units.nonzero? ? '-' : '' ) +
      to_units( units, thousands )
  else
    return Teens[ units ] + to_thousands( thousands )
  end
end

.to_thousands(thousands = 0) ⇒ Object

Transform the specified number into one or more words like ‘thousand’, ‘million’, etc. Uses the thousands (American) system.



1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
# File 'lib/linguistics/en.rb', line 1012

def to_thousands( thousands=0 )
  parts = []
  (0..thousands).step( Thousands.length - 1 ) {|i|
    if i.zero?
      parts.push Thousands[ thousands % (Thousands.length - 1) ]
    else
      parts.push Thousands.last
    end
  }

  return parts.join(" ")
end

.to_units(units, thousands = 0) ⇒ Object

Transform the specified number of units-place numerals into a word-phrase at the given number of thousands places.



975
976
977
# File 'lib/linguistics/en.rb', line 975

def to_units( units, thousands=0 )
  return Units[ units ] + to_thousands( thousands )
end

.wnErrorObject

If #haveWordnet? returns false, this can be called to fetch the exception which was raised when WordNet was loaded.



111
# File 'lib/linguistics/en/wordnet.rb', line 111

def wnError ; @wnError; end

.wnLexiconObject

The instance of the WordNet::Lexicon used for all Linguistics WordNet functions.



115
116
117
118
119
120
121
122
123
# File 'lib/linguistics/en/wordnet.rb', line 115

def wnLexicon
  if @wnError
    raise NotImplementedError,
      "WordNet functions are not loaded: %s" %
      @wnError.message
  end

  @wnLexicon ||= WordNet::Lexicon::new
end