Class: Qiita::Elasticsearch::Tokenizer

Inherits:
Object
  • Object
show all
Defined in:
lib/qiita/elasticsearch/tokenizer.rb

Constant Summary collapse

DEFAULT_DATE_FIELDS =
[]
DEFAULT_DOWNCASED_FIELDS =
[]
DEFAULT_FILTERABLE_FIELDS =
[]
DEFAULT_HIERARCHAL_FIELDS =
[]
DEFAULT_INT_FIELDS =
[]
DEFAULT_DEFAULT_FIELDS =
[]
EXTRA_DATE_FIELDS =
%w(created updated)
EXTRA_FILTERABLE_FIELDS =
%w(created is sort updated)
DEFAULT_MATCHABLE_OPTIONS =
{}
TOKEN_PATTERN =
/
  (?<token_string>
    (?<minus>-)?
    (?:(?<field_name>\w+):)?
    (?:
      (?:"(?<quoted_term>.*?)(?<!\\)")
      |
      (?<term>\S+)
    )
  )
/x

Instance Method Summary collapse

Constructor Details

#initialize(all_fields: nil, date_fields: nil, downcased_fields: nil, filterable_fields: nil, hierarchal_fields: nil, int_fields: nil, default_fields: nil, time_zone: nil, matchable_options: nil) ⇒ Tokenizer



42
43
44
45
46
47
48
49
50
51
52
# File 'lib/qiita/elasticsearch/tokenizer.rb', line 42

def initialize(all_fields: nil, date_fields: nil, downcased_fields: nil, filterable_fields: nil, hierarchal_fields: nil, int_fields: nil, default_fields: nil, time_zone: nil, matchable_options: nil)
  @date_fields = (date_fields || DEFAULT_DATE_FIELDS) | EXTRA_DATE_FIELDS
  @downcased_fields = downcased_fields || DEFAULT_DOWNCASED_FIELDS
  @filterable_fields = (filterable_fields || DEFAULT_FILTERABLE_FIELDS) | EXTRA_FILTERABLE_FIELDS
  @hierarchal_fields = hierarchal_fields || DEFAULT_HIERARCHAL_FIELDS
  @int_fields = int_fields || DEFAULT_INT_FIELDS
  @default_fields = default_fields || DEFAULT_DEFAULT_FIELDS
  @matchable_options = matchable_options || DEFAULT_MATCHABLE_OPTIONS
  @all_fields = aggregate_all_fields(all_fields)
  @time_zone = time_zone
end

Instance Method Details

#tokenize(query_string) ⇒ Array<Qiita::Elasticsearch::Token>



56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# File 'lib/qiita/elasticsearch/tokenizer.rb', line 56

def tokenize(query_string)
  query_string.scan(TOKEN_PATTERN).map do |token_string, minus, field_name, quoted_term, term|
    term ||= quoted_term
    if !field_name.nil? && !@all_fields.include?(field_name)
      term = "#{field_name}:#{term}"
      field_name = nil
    end
    token = token_class(field_name).new(
      downcased: @downcased_fields.include?(field_name),
      field_name: field_name,
      negative: !minus.nil?,
      quoted: !quoted_term.nil?,
      filter: @filterable_fields.include?(field_name),
      term: term,
      token_string: token_string,
    )
    token.options = @matchable_options if token.is_a?(MatchableToken)
    token.default_fields = @default_fields if token.is_a?(MatchableToken)
    token.time_zone = @time_zone if token.is_a?(DateToken)
    token
  end
end