Class: Sastrawi::Stemmer::Stemmer
- Inherits:
-
Object
- Object
- Sastrawi::Stemmer::Stemmer
- Defined in:
- lib/sastrawi/stemmer/stemmer.rb
Instance Attribute Summary collapse
-
#dictionary ⇒ Object
readonly
Returns the value of attribute dictionary.
-
#visitor_provider ⇒ Object
readonly
Returns the value of attribute visitor_provider.
Instance Method Summary collapse
-
#initialize(dictionary) ⇒ Stemmer
constructor
A new instance of Stemmer.
- #plural?(word) ⇒ Boolean
-
#stem(text) ⇒ Object
Stem a string to its base form.
-
#stem_plural_word(word) ⇒ Object
Stem a plural word to its base form Asian J.
-
#stem_singular_word(word) ⇒ Object
Stem a singular word to its base form.
-
#stem_word(word) ⇒ Object
Stem a word to its base form.
Constructor Details
#initialize(dictionary) ⇒ Stemmer
Returns a new instance of Stemmer.
16 17 18 19 |
# File 'lib/sastrawi/stemmer/stemmer.rb', line 16 def initialize(dictionary) @dictionary = dictionary @visitor_provider = Sastrawi::Stemmer::Context::Visitor::VisitorProvider.new end |
Instance Attribute Details
#dictionary ⇒ Object (readonly)
Returns the value of attribute dictionary.
14 15 16 |
# File 'lib/sastrawi/stemmer/stemmer.rb', line 14 def dictionary @dictionary end |
#visitor_provider ⇒ Object (readonly)
Returns the value of attribute visitor_provider.
14 15 16 |
# File 'lib/sastrawi/stemmer/stemmer.rb', line 14 def visitor_provider @visitor_provider end |
Instance Method Details
#plural?(word) ⇒ Boolean
48 49 50 51 52 53 54 |
# File 'lib/sastrawi/stemmer/stemmer.rb', line 48 def plural?(word) matches = /^(.*)-(ku|mu|nya|lah|kah|tah|pun)$/.match(word) return matches[1].include?('-') if matches return word.include?('-') end |
#stem(text) ⇒ Object
Stem a string to its base form
24 25 26 27 28 29 30 31 32 33 34 35 |
# File 'lib/sastrawi/stemmer/stemmer.rb', line 24 def stem(text) normalized_text = Sastrawi::Stemmer::Filter::TextNormalizer.normalize_text(text) words = normalized_text.split(' ') stems = [] words.each do |word| stems.push(stem_word(word)) end stems.join(' ') end |
#stem_plural_word(word) ⇒ Object
Stem a plural word to its base form Asian J. (2007) “Effective Techniques for Indonesian Text Retrieval” page 76-77
61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
# File 'lib/sastrawi/stemmer/stemmer.rb', line 61 def stem_plural_word(word) first_match = /^(.*)-(.*)$/.match(word) return word unless first_match words = [first_match[1], first_match[2]] suffix = words[1] suffixes = %w[ku mu nya lah kah tah pun] second_match = /^(.*)-(.*)$/.match(words[0]) if suffixes.include?(suffix) && second_match words[0] = second_match[1] words[1] = "#{second_match[2]}-#{suffix}" end root_first_word = stem_singular_word(words[0]) root_second_word = stem_singular_word(words[1]) if !@dictionary.contains?(words[1]) && root_second_word == words[1] root_second_word = stem_singular_word("me#{words[1]}") end if root_first_word == root_second_word root_first_word else word end end |
#stem_singular_word(word) ⇒ Object
Stem a singular word to its base form
93 94 95 96 97 98 |
# File 'lib/sastrawi/stemmer/stemmer.rb', line 93 def stem_singular_word(word) context = Sastrawi::Stemmer::Context::Context.new(word, @dictionary, @visitor_provider) context.execute context.result end |
#stem_word(word) ⇒ Object
Stem a word to its base form
40 41 42 43 44 45 46 |
# File 'lib/sastrawi/stemmer/stemmer.rb', line 40 def stem_word(word) if plural?(word) stem_plural_word(word) else stem_singular_word(word) end end |