Class: Tantiny::Tokenizer

Inherits:
Object
  • Object
show all
Defined in:
lib/tantiny/tokenizer.rb

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.defaultObject



5
6
7
# File 'lib/tantiny/tokenizer.rb', line 5

def self.default
  new(:simple)
end

.new(kind, **options) ⇒ Object



9
10
11
12
13
14
15
16
17
18
19
20
21
22
# File 'lib/tantiny/tokenizer.rb', line 9

def self.new(kind, **options)
  case kind
  when :simple
    __new_simple_tokenizer
  when :stemmer
    language = options[:language] || :en
    __new_stemmer_tokenizer(language.to_s)
  when :ngram
    prefix_only = options.fetch(:prefix_only, false)
    __new_ngram_tokenizer(options[:min], options[:max], prefix_only)
  else
    raise UnknownTokenizer.new(kind)
  end
end

Instance Method Details

#terms(string) ⇒ Object



24
25
26
# File 'lib/tantiny/tokenizer.rb', line 24

def terms(string)
  __extract_terms(string)
end