Class: FtsLite::Tokenizer::WakachiBigram
- Inherits:
-
Object
- Object
- FtsLite::Tokenizer::WakachiBigram
- Defined in:
- lib/fts_lite/tokenizer.rb
Instance Method Summary collapse
Instance Method Details
#query(text, options = {}) ⇒ Object
105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 |
# File 'lib/fts_lite/tokenizer.rb', line 105 def query(text, = {}) fuzzy = .key?(:fuzzy) ? [:fuzzy] : false near = fuzzy ? NEAR2 : NEAR0 text = Tokenizer.normalize(text) text.split(QUERY_DELIMITER).map {|segment| BimyouSegmenter.segment(segment, :white_space => false, :symbol => false).map {|word| if (word.size == 1) word else 0.upto(word.size - 2).map {|i| word[i, 2] }.join(near) end }.flatten.join(near) }.join(" ") end |
#split(text) ⇒ Object
124 125 126 127 128 129 130 131 132 133 134 |
# File 'lib/fts_lite/tokenizer.rb', line 124 def split(text) BimyouSegmenter.segment(Tokenizer.normalize(text), :white_space => false, :symbol => false).map {|word| if (word.size == 1) word else 0.upto(word.size - 2).map {|i| word[i, 2] } end }.flatten end |
#vector(text) ⇒ Object
121 122 123 |
# File 'lib/fts_lite/tokenizer.rb', line 121 def vector(text) split(text).join(" ") end |