Class: FtsLite::Tokenizer::Bigram

Inherits:
Object
  • Object
show all
Defined in:
lib/fts_lite/tokenizer.rb

Instance Method Summary collapse

Instance Method Details

#query(text, options = {}) ⇒ Object



43
44
45
46
47
48
49
50
51
52
# File 'lib/fts_lite/tokenizer.rb', line 43

def query(text, options = {})
  fuzzy = options.key?(:fuzzy) ? options[:fuzzy] : false
  near = fuzzy ? NEAR2 : NEAR0
  text = Tokenizer.normalize(text)
  text.split(QUERY_DELIMITER).map {|segment|
    segment.split(SIMPLE_DELIMITER).map {|word|
      0.upto(word.size - 2).map {|i| word[i, 2] }
    }.join(near)
  }.flatten.join(" ")
end

#split(text) ⇒ Object



56
57
58
59
60
61
# File 'lib/fts_lite/tokenizer.rb', line 56

def split(text)
  text = Tokenizer.normalize(text)
  text.split(SIMPLE_DELIMITER).map {|word|
    0.upto(word.size - 2).map {|i| word[i, 2] }
  }.flatten
end

#vector(text) ⇒ Object



53
54
55
# File 'lib/fts_lite/tokenizer.rb', line 53

def vector(text)
  split(text).join(" ")
end