Module: FtsLite::Tokenizer

Defined in:
lib/fts_lite/tokenizer.rb

Defined Under Namespace

Classes: Bigram, Simple, Trigram, Wakachi, WakachiBigram

Constant Summary collapse

SIMPLE_DELIMITER =
/[\s\.,\?!;\(\)。、.,?!「」『』()]+/

Class Method Summary collapse

Class Method Details

.create(name) ⇒ Object



9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
# File 'lib/fts_lite/tokenizer.rb', line 9

def self.create(name)
  case name.to_sym
  when :simple
    Simple.new
  when :bigram
    Bigram.new
  when :trigram
    Trigram.new
  when :wakachi
    Wakachi.new
  when :wakachi_bigram
    WakachiBigram.new
  else
    raise ArgumentError
  end
end

.normalize(text) ⇒ Object



25
26
27
# File 'lib/fts_lite/tokenizer.rb', line 25

def self.normalize(text)
  NKF::nkf('-wZX', text).downcase
end