Class: Qiita::Elasticsearch::Tokenizer

Inherits:
Object
  • Object
show all
Defined in:
lib/qiita/elasticsearch/tokenizer.rb

Constant Summary collapse

DEFAULT_FILTERABLE_FIELDS =
[]
TOKEN_PATTERN =
/
  (?<token_string>
    (?<minus>-)?
    (?:(?<field_name>\w+):)?
    (?:
      (?:"(?<quoted_term>.*?)(?<!\\)")
      |
      (?<term>\S+)
    )
  )
/x

Instance Method Summary collapse

Constructor Details

#initialize(filterable_fields: nil) ⇒ Tokenizer



21
22
23
# File 'lib/qiita/elasticsearch/tokenizer.rb', line 21

def initialize(filterable_fields: nil)
  @filterable_fields = filterable_fields
end

Instance Method Details

#tokenize(query_string) ⇒ Array<Qiita::Elasticsearch::Token>



27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# File 'lib/qiita/elasticsearch/tokenizer.rb', line 27

def tokenize(query_string)
  query_string.scan(TOKEN_PATTERN).map do |token_string, minus, field_name, quoted_term, term|
    term ||= quoted_term
    if !field_name.nil? && !filterable_fields.include?(field_name)
      term = "#{field_name}:#{term}"
      field_name = nil
    end
    Token.new(
      field_name: field_name,
      minus: minus,
      quoted: !quoted_term.nil?,
      term: term,
      token_string: token_string,
    )
  end
end