Class: Lemmatizer::Lemmatizer

Inherits:
Object
  • Object
show all
Defined in:
lib/lemmatizer/lemmatizer.rb

Constant Summary collapse

DATA_DIR =
File.expand_path('..', File.dirname(__FILE__))
WN_FILES =
{
  :noun => [
    DATA_DIR + '/dict/index.noun', 
    DATA_DIR + '/dict/noun.exc'
  ],
  :verb => [
    DATA_DIR + '/dict/index.verb', 
    DATA_DIR + '/dict/verb.exc'
  ],
  :adj  => [
    DATA_DIR + '/dict/index.adj', 
    DATA_DIR + '/dict/adj.exc'
  ],
  :adv  => [
    DATA_DIR + '/dict/index.adv', 
    DATA_DIR + '/dict/adv.exc'
  ]
}
MORPHOLOGICAL_SUBSTITUTIONS =
{
  :noun => [
    ['s',    ''   ], 
    ['ses',  's'  ], 
    ['ves',  'f'  ], 
    ['xes',  'x'  ],
    ['zes',  'z'  ], 
    ['ches', 'ch' ], 
    ['shes', 'sh' ],
    ['men',  'man'], 
    ['ies',  'y'  ]
  ],
  :verb => [
    ['s',   '' ], 
    ['ies', 'y'], 
    ['es',  'e'], 
    ['es',  '' ],
    ['ed',  'e'], 
    ['ed',  '' ], 
    ['ing', 'e'], 
    ['ing', '' ]
  ],
  :adj =>  [
    ['er',  '' ], 
    ['est', '' ], 
    ['er',  'e'], 
    ['est', 'e']
  ],
  :adv =>  [
  ]
}

Instance Method Summary collapse

Constructor Details

#initialize(files = WN_FILES) ⇒ Lemmatizer

Returns a new instance of Lemmatizer.



58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# File 'lib/lemmatizer/lemmatizer.rb', line 58

def initialize(files = WN_FILES)
  @wordlists  = {}
  @exceptions = {}
  
  MORPHOLOGICAL_SUBSTITUTIONS.keys.each do |x|
    @wordlists[x]  = {}
    @exceptions[x] = {}
  end

  if files
    files.each_pair do |pos, pair|
      load_wordnet_files(pos, pair[0], pair[1])
    end
  end
end

Instance Method Details

#lemma(form, pos = nil) ⇒ Object



74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# File 'lib/lemmatizer/lemmatizer.rb', line 74

def lemma(form, pos = nil)
  unless pos
    [:verb, :noun, :adj, :adv].each do |p|
      result = lemma(form, p)
      return result unless result == form
    end

    return form
  end 

  each_lemma(form, pos) do |x|
    return x
  end
  
  form
end