Module: IndonesianStemmer

Extended by:
MorphologicalUtility
Defined in:
lib/indonesian_stemmer.rb,
lib/indonesian_stemmer/version.rb,
lib/indonesian_stemmer/irregular_words.rb,
lib/indonesian_stemmer/stemmer_utility.rb,
lib/indonesian_stemmer/morphological_utility.rb

Defined Under Namespace

Modules: IrregularWords, MorphologicalUtility, StemmerUtility

Constant Summary collapse

VERSION =
"0.2.0"
VOWEL_CHARACTERS =
%w( a e i o u )
PARTICLE_CHARACTERS =
%w( kah lah pun )
POSSESSIVE_PRONOUN_CHARACTERS =
%w( ku mu nya )
FIRST_ORDER_PREFIX_CHARACTERS =
%w( meng meny men mem me
peng peny pen pem di ter ke )
SPECIAL_FIRST_ORDER_PREFIX_CHARACTERS =
%w( meng peng meny peny men pen
mem pem )
SECOND_ORDER_PREFIX_CHARACTERS =
%w( ber be per pe )
NON_SPECIAL_SECOND_ORDER_PREFIX_CHARACTERS =
%w( ber per pe )
SPECIAL_SECOND_ORDER_PREFIX_WORDS =
%w( belajar pelajar belunjur )
SUFFIX_CHARACTERS =
%w( kan an i )
WITH_VOWEL_SUBSTITUTION_PREFIX_CHARACTERS =
%w( meny peny men pen )
REMOVED_KE =
1
REMOVED_PENG =
2
REMOVED_DI =
4
REMOVED_MENG =
8
REMOVED_TER =
16
REMOVED_BER =
32
REMOVED_PE =
64

Class Attribute Summary collapse

Class Method Summary collapse

Methods included from MorphologicalUtility

included

Methods included from StemmerUtility

included

Class Attribute Details

.number_of_syllablesObject

Returns the value of attribute number_of_syllables.



9
10
11
# File 'lib/indonesian_stemmer.rb', line 9

def number_of_syllables
  @number_of_syllables
end

Class Method Details

.stem(word, derivational_stemming = true) ⇒ Object



11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
# File 'lib/indonesian_stemmer.rb', line 11

def stem(word, derivational_stemming = true)
  @flags = 0

  if word =~ /\s/
    word.split(' ').map { |w| stem(w) }
  else
    @number_of_syllables = total_syllables word

    remove_particle(word) if still_has_many_syllables?
    remove_possessive_pronoun(word) if still_has_many_syllables?

    stem_derivational(word) if derivational_stemming

    word
  end
end