Class: Qiita::Elasticsearch::Tokenizer

Inherits:
Object
  • Object
show all
Defined in:
lib/qiita/elasticsearch/tokenizer.rb

Constant Summary collapse

DEFAULT_DATE_FIELDS =
[]
DEFAULT_DOWNCASED_FIELDS =
[]
DEFAULT_FILTERABLE_FIELDS =
[]
DEFAULT_HIERARCHAL_FIELDS =
[]
DEFAULT_INT_FIELDS =
[]
DEFAULT_DEFAULT_FIELDS =
[]
EXTRA_DATE_FIELDS =
%w(created updated)
EXTRA_FILTERABLE_FIELDS =
%w(created is sort updated)
TOKEN_PATTERN =
/
  (?<token_string>
    (?<minus>-)?
    (?:(?<field_name>\w+):)?
    (?:
      (?:"(?<quoted_term>.*?)(?<!\\)")
      |
      (?<term>\S+)
    )
  )
/x

Instance Method Summary collapse

Constructor Details

#initialize(all_fields: nil, date_fields: nil, downcased_fields: nil, filterable_fields: nil, hierarchal_fields: nil, int_fields: nil, default_fields: nil, time_zone: nil) ⇒ Tokenizer

Returns a new instance of Tokenizer.

Parameters:

  • all_fields (Array<String>, nil) (defaults to: nil)
  • date_fields (Array<String>, nil) (defaults to: nil)
  • downcased_fields (Array<String>, nil) (defaults to: nil)
  • filterable_fields (Array<String>, nil) (defaults to: nil)
  • hierarchal_fields (Array<String>, nil) (defaults to: nil)
  • int_fields (Array<String>, nil) (defaults to: nil)
  • default_fields (Array<String>, nil) (defaults to: nil)
  • time_zone (String, nil) (defaults to: nil)


39
40
41
42
43
44
45
46
47
48
# File 'lib/qiita/elasticsearch/tokenizer.rb', line 39

def initialize(all_fields: nil, date_fields: nil, downcased_fields: nil, filterable_fields: nil, hierarchal_fields: nil, int_fields: nil, default_fields: nil, time_zone: nil)
  @date_fields = (date_fields || DEFAULT_DATE_FIELDS) | EXTRA_DATE_FIELDS
  @downcased_fields = downcased_fields || DEFAULT_DOWNCASED_FIELDS
  @filterable_fields = (filterable_fields || DEFAULT_FILTERABLE_FIELDS) | EXTRA_FILTERABLE_FIELDS
  @hierarchal_fields = hierarchal_fields || DEFAULT_HIERARCHAL_FIELDS
  @int_fields = int_fields || DEFAULT_INT_FIELDS
  @default_fields = default_fields || DEFAULT_DEFAULT_FIELDS
  @all_fields = aggregate_all_fields(all_fields)
  @time_zone = time_zone
end

Instance Method Details

#tokenize(query_string) ⇒ Array<Qiita::Elasticsearch::Token>

Parameters:

  • query_string (String)

    Raw query string

Returns:



52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# File 'lib/qiita/elasticsearch/tokenizer.rb', line 52

def tokenize(query_string)
  query_string.scan(TOKEN_PATTERN).map do |token_string, minus, field_name, quoted_term, term|
    term ||= quoted_term
    if !field_name.nil? && !@all_fields.include?(field_name)
      term = "#{field_name}:#{term}"
      field_name = nil
    end
    token = token_class(field_name).new(
      downcased: @downcased_fields.include?(field_name),
      field_name: field_name,
      negative: !minus.nil?,
      quoted: !quoted_term.nil?,
      filter: @filterable_fields.include?(field_name),
      term: term,
      token_string: token_string,
    )
    token.default_fields = @default_fields if token.is_a?(MatchableToken)
    token.time_zone = @time_zone if token.is_a?(DateToken)
    token
  end
end