Class: FtsLite::Tokenizer::WakachiBigram

Inherits:
Object
  • Object
show all
Defined in:
lib/fts_lite/tokenizer.rb

Instance Method Summary collapse

Instance Method Details

#query(text, options = {}) ⇒ Object



105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# File 'lib/fts_lite/tokenizer.rb', line 105

def query(text, options = {})
  fuzzy = options.key?(:fuzzy) ? options[:fuzzy] : false
  near = fuzzy ? NEAR2 : NEAR0
  text = Tokenizer.normalize(text)
  text.split(QUERY_DELIMITER).map {|segment|
    BimyouSegmenter.segment(segment,
                            :white_space => false,
                            :symbol => false).map {|word|
      if (word.size == 1)
        word
      else
        0.upto(word.size - 2).map {|i| word[i, 2] }.join(near)
      end
    }.flatten.join(near)
  }.join(" ")
end

#split(text) ⇒ Object



124
125
126
127
128
129
130
131
132
133
134
# File 'lib/fts_lite/tokenizer.rb', line 124

def split(text)
  BimyouSegmenter.segment(Tokenizer.normalize(text),
                          :white_space => false,
                          :symbol => false).map {|word|
    if (word.size == 1)
      word
    else
      0.upto(word.size - 2).map {|i| word[i, 2] }
    end
  }.flatten
end

#vector(text) ⇒ Object



121
122
123
# File 'lib/fts_lite/tokenizer.rb', line 121

def vector(text)
  split(text).join(" ")
end