Class: Sastrawi::Stemmer::Stemmer

Inherits:
Object
  • Object
show all
Defined in:
lib/sastrawi/stemmer/stemmer.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(dictionary) ⇒ Stemmer

Returns a new instance of Stemmer.



16
17
18
19
# File 'lib/sastrawi/stemmer/stemmer.rb', line 16

def initialize(dictionary)
  @dictionary = dictionary
  @visitor_provider = Sastrawi::Stemmer::Context::Visitor::VisitorProvider.new
end

Instance Attribute Details

#dictionaryObject (readonly)

Returns the value of attribute dictionary.



14
15
16
# File 'lib/sastrawi/stemmer/stemmer.rb', line 14

def dictionary
  @dictionary
end

#visitor_providerObject (readonly)

Returns the value of attribute visitor_provider.



14
15
16
# File 'lib/sastrawi/stemmer/stemmer.rb', line 14

def visitor_provider
  @visitor_provider
end

Instance Method Details

#plural?(word) ⇒ Boolean

Returns:

  • (Boolean)


48
49
50
51
52
53
54
# File 'lib/sastrawi/stemmer/stemmer.rb', line 48

def plural?(word)
  matches = /^(.*)-(ku|mu|nya|lah|kah|tah|pun)$/.match(word)

  return matches[1].include?('-') if matches

  return word.include?('-')
end

#stem(text) ⇒ Object

Stem a string to its base form



24
25
26
27
28
29
30
31
32
33
34
35
# File 'lib/sastrawi/stemmer/stemmer.rb', line 24

def stem(text)
  normalized_text = Sastrawi::Stemmer::Filter::TextNormalizer.normalize_text(text)

  words = normalized_text.split(' ')
  stems = []

  words.each do |word|
    stems.push(stem_word(word))
  end

  stems.join(' ')
end

#stem_plural_word(word) ⇒ Object

Stem a plural word to its base form Asian J. (2007) “Effective Techniques for Indonesian Text Retrieval” page 76-77



61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# File 'lib/sastrawi/stemmer/stemmer.rb', line 61

def stem_plural_word(word)
  first_match = /^(.*)-(.*)$/.match(word)

  return word unless first_match

  words = [first_match[1], first_match[2]]
  suffix = words[1]
  suffixes = %w[ku mu nya lah kah tah pun]
  second_match = /^(.*)-(.*)$/.match(words[0])

  if suffixes.include?(suffix) && second_match
    words[0] = second_match[1]
    words[1] = "#{second_match[2]}-#{suffix}"
  end

  root_first_word = stem_singular_word(words[0])
  root_second_word = stem_singular_word(words[1])

  if !@dictionary.contains?(words[1]) && root_second_word == words[1]
    root_second_word = stem_singular_word("me#{words[1]}")
  end

  if root_first_word == root_second_word
    root_first_word
  else
    word
  end
end

#stem_singular_word(word) ⇒ Object

Stem a singular word to its base form



93
94
95
96
97
98
# File 'lib/sastrawi/stemmer/stemmer.rb', line 93

def stem_singular_word(word)
  context = Sastrawi::Stemmer::Context::Context.new(word, @dictionary, @visitor_provider)
  context.execute

  context.result
end

#stem_word(word) ⇒ Object

Stem a word to its base form



40
41
42
43
44
45
46
# File 'lib/sastrawi/stemmer/stemmer.rb', line 40

def stem_word(word)
  if plural?(word)
    stem_plural_word(word)
  else
    stem_singular_word(word)
  end
end