Class: FtsLite::Tokenizer::WakachiBigram

Inherits:
Object
  • Object
show all
Defined in:
lib/fts_lite/tokenizer.rb

Instance Method Summary collapse

Instance Method Details

#split(text) ⇒ Object



72
73
74
75
76
77
78
79
80
81
82
# File 'lib/fts_lite/tokenizer.rb', line 72

def split(text)
  BimyouSegmenter.segment(Tokenizer.normalize(text),
                          :white_space => false,
                          :symbol => false).map {|word|
    if (word.size == 1)
      word
    else
      0.upto(word.size - 2).map {|i| word[i, 2] }
    end
  }.flatten
end

#vector(text) ⇒ Object



69
70
71
# File 'lib/fts_lite/tokenizer.rb', line 69

def vector(text)
  split(text).join(" ")
end