Class: Redmine::Search::Tokenizer

Inherits:
Object
  • Object
show all
Defined in:
lib/redmine/search.rb

Instance Method Summary collapse

Constructor Details

#initialize(question) ⇒ Tokenizer

Returns a new instance of Tokenizer.



130
131
132
# File 'lib/redmine/search.rb', line 130

def initialize(question)
  @question = question.to_s
end

Instance Method Details

#tokensObject



134
135
136
137
138
139
140
141
142
143
144
145
146
# File 'lib/redmine/search.rb', line 134

def tokens
  # extract tokens from the question
  # eg. hello "bye bye" => ["hello", "bye bye"]
  tokens = @question.scan(/"[^"]+"|[^\p{Zs}]+/).map do |token|
    # Remove quotes from quoted tokens, strip surrounding whitespace
    # e.g. "\" foo bar \"" => "foo bar"
    token.gsub(/\A"\p{Zs}*|\p{Zs}*"\Z/, '')
  end
  # tokens must be at least 2 characters long
  # but for Chinese characters (Chinese HANZI/Japanese KANJI), tokens can be one character
  # no more than 5 tokens to search for
  tokens.uniq.select{|w| w.length > 1 || w =~ /\p{Han}/}.first 5
end