Class: FtsLite::Tokenizer::Bigram

Inherits:
Object
  • Object
show all
Defined in:
lib/fts_lite/tokenizer.rb

Instance Method Summary collapse

Instance Method Details

#split(text) ⇒ Object



40
41
42
43
44
45
# File 'lib/fts_lite/tokenizer.rb', line 40

def split(text)
  text = Tokenizer.normalize(text)
  text.split(SIMPLE_DELIMITER).map {|word|
    0.upto(word.size - 2).map {|i| word[i, 2] }
  }.flatten
end

#vector(text) ⇒ Object



37
38
39
# File 'lib/fts_lite/tokenizer.rb', line 37

def vector(text)
  split(text).join(" ")
end