Class: Qiita::Elasticsearch::Tokenizer

Inherits:
Object
  • Object
show all
Defined in:
lib/qiita/elasticsearch/tokenizer.rb

Constant Summary collapse

DEFAULT_DATE_FIELDS =
[]
DEFAULT_DOWNCASED_FIELDS =
[]
DEFAULT_FILTERABLE_FIELDS =
[]
DEFAULT_HIERARCHAL_FIELDS =
[]
DEFAULT_INT_FIELDS =
[]
TOKEN_PATTERN =
/
  (?<token_string>
    (?<minus>-)?
    (?:(?<field_name>\w+):)?
    (?:
      (?:"(?<quoted_term>.*?)(?<!\\)")
      |
      (?<term>\S+)
    )
  )
/x

Instance Method Summary collapse

Constructor Details

#initialize(date_fields: nil, downcased_fields: nil, filterable_fields: nil, hierarchal_fields: nil, int_fields: nil, matchable_fields: nil, time_zone: nil) ⇒ Tokenizer

Returns a new instance of Tokenizer.

Parameters:

  • date_fields (Array<String>, nil) (defaults to: nil)
  • downcased_fields (Array<String>, nil) (defaults to: nil)
  • filterable_fields (Array<String>, nil) (defaults to: nil)
  • hierarchal_fields (Array<String>, nil) (defaults to: nil)
  • int_fields (Array<String>, nil) (defaults to: nil)
  • matchable_fields (Array<String>, nil) (defaults to: nil)
  • time_zone (String, nil) (defaults to: nil)


35
36
37
38
39
40
41
42
43
# File 'lib/qiita/elasticsearch/tokenizer.rb', line 35

def initialize(date_fields: nil, downcased_fields: nil, filterable_fields: nil, hierarchal_fields: nil, int_fields: nil, matchable_fields: nil, time_zone: nil)
  @date_fields = date_fields
  @downcased_fields = downcased_fields
  @filterable_fields = filterable_fields
  @hierarchal_fields = hierarchal_fields
  @int_fields = int_fields
  @matchable_fields = matchable_fields
  @time_zone = time_zone
end

Instance Method Details

#tokenize(query_string) ⇒ Array<Qiita::Elasticsearch::Token>

Parameters:

  • query_string (String)

    Raw query string

Returns:



47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# File 'lib/qiita/elasticsearch/tokenizer.rb', line 47

def tokenize(query_string)
  query_string.scan(TOKEN_PATTERN).map do |token_string, minus, field_name, quoted_term, term|
    term ||= quoted_term
    if !field_name.nil? && !filterable_fields.include?(field_name)
      term = "#{field_name}:#{term}"
      field_name = nil
    end
    token = token_class(field_name).new(
      downcased: downcased_fields.include?(field_name),
      field_name: field_name,
      minus: minus,
      quoted: !quoted_term.nil?,
      term: term,
      token_string: token_string,
    )
    token.matchable_fields = @matchable_fields if token.is_a?(MatchableToken)
    token.time_zone = @time_zone if token.is_a?(DateToken)
    token
  end
end