Module: TextComb

Defined in:
lib/text_comb.rb,
lib/text_comb/string.rb,
lib/text_comb/version.rb,
lib/text_comb/iterator.rb,
lib/text_comb/string_extensions.rb

Defined Under Namespace

Modules: StringExtensions Classes: Iterator, String

Constant Summary collapse

VERSION =
"0.0.2"

Class Method Summary collapse

Class Method Details

.guess_language(string) ⇒ Object

TextComb.guess_language “How are you?”



43
44
45
# File 'lib/text_comb.rb', line 43

def self.guess_language(string)
  stop.StopWords.guess(string)
end

.ngrams(string, n, options = {}) ⇒ Object

TextComb.ngrams(string, 3) TextComb.ngrams(string, 3, :locale => java.util.Locale.default) TextComb.ngrams(string, 3, :stop_words => :guess) TextComb.ngrams(string, 3, :stop_words => :English) TextComb.ngrams(string, 3, TextComb.guess_language(string))



22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# File 'lib/text_comb.rb', line 22

def self.ngrams(string, n, options={})

  locale = options[:locale] || java.util.Locale.default

  stop_words_val = case options[:stop_words]
    when :guess
      guess_language(string)
    when Symbol
      stop_words(options[:stop_words])
    when stop.StopWords
      options[:stop_words]
    when nil
      nil
    else
      raise "Can't recognize the stop_words: #{options[:stop_words]}"
  end

  enumerate(cue.NGramIterator.new(n, string, locale, stop_words_val))
end

.sentences(string) ⇒ Object



13
14
15
# File 'lib/text_comb.rb', line 13

def self.sentences(string)
  enumerate(cue.SentenceIterator.new(string))
end

.stop_words(stopwords_symbol) ⇒ Object

TextComb.stop_words :English TextComb.stop_words :French



49
50
51
# File 'lib/text_comb.rb', line 49

def self.stop_words(stopwords_symbol)
  stop.StopWords.const_get(stopwords_symbol)
end

.string(s) ⇒ Object

For convenience



55
56
57
# File 'lib/text_comb.rb', line 55

def self.string(s)
  TextComb::String.new(s)
end

.words(string) ⇒ Object



9
10
11
# File 'lib/text_comb.rb', line 9

def self.words(string)
  enumerate(cue.WordIterator.new(string))
end