Module: TextComb
- Defined in:
- lib/text_comb.rb,
lib/text_comb/string.rb,
lib/text_comb/version.rb,
lib/text_comb/iterator.rb,
lib/text_comb/string_extensions.rb
Defined Under Namespace
Modules: StringExtensions Classes: Iterator, String
Constant Summary collapse
- VERSION =
"0.0.2"
Class Method Summary collapse
-
.guess_language(string) ⇒ Object
TextComb.guess_language “How are you?”.
-
.ngrams(string, n, options = {}) ⇒ Object
TextComb.ngrams(string, 3) TextComb.ngrams(string, 3, :locale => java.util.Locale.default) TextComb.ngrams(string, 3, :stop_words => :guess) TextComb.ngrams(string, 3, :stop_words => :English) TextComb.ngrams(string, 3, TextComb.guess_language(string)).
- .sentences(string) ⇒ Object
-
.stop_words(stopwords_symbol) ⇒ Object
TextComb.stop_words :English TextComb.stop_words :French.
-
.string(s) ⇒ Object
For convenience.
- .words(string) ⇒ Object
Class Method Details
.guess_language(string) ⇒ Object
TextComb.guess_language “How are you?”
43 44 45 |
# File 'lib/text_comb.rb', line 43 def self.guess_language(string) stop.StopWords.guess(string) end |
.ngrams(string, n, options = {}) ⇒ Object
TextComb.ngrams(string, 3) TextComb.ngrams(string, 3, :locale => java.util.Locale.default) TextComb.ngrams(string, 3, :stop_words => :guess) TextComb.ngrams(string, 3, :stop_words => :English) TextComb.ngrams(string, 3, TextComb.guess_language(string))
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
# File 'lib/text_comb.rb', line 22 def self.ngrams(string, n, ={}) locale = [:locale] || java.util.Locale.default stop_words_val = case [:stop_words] when :guess guess_language(string) when Symbol stop_words([:stop_words]) when stop.StopWords [:stop_words] when nil nil else raise "Can't recognize the stop_words: #{[:stop_words]}" end enumerate(cue.NGramIterator.new(n, string, locale, stop_words_val)) end |
.sentences(string) ⇒ Object
13 14 15 |
# File 'lib/text_comb.rb', line 13 def self.sentences(string) enumerate(cue.SentenceIterator.new(string)) end |
.stop_words(stopwords_symbol) ⇒ Object
TextComb.stop_words :English TextComb.stop_words :French
49 50 51 |
# File 'lib/text_comb.rb', line 49 def self.stop_words(stopwords_symbol) stop.StopWords.const_get(stopwords_symbol) end |
.string(s) ⇒ Object
For convenience
55 56 57 |
# File 'lib/text_comb.rb', line 55 def self.string(s) TextComb::String.new(s) end |
.words(string) ⇒ Object
9 10 11 |
# File 'lib/text_comb.rb', line 9 def self.words(string) enumerate(cue.WordIterator.new(string)) end |