Class: Metacrunch::Elasticsearch::Indexer

Inherits:
Processor
  • Object
show all
Includes:
ClientFactory, OptionsHelpers
Defined in:
lib/metacrunch/elasticsearch/indexer.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from OptionsHelpers

#extract_options!, #normalize_options!

Methods included from ClientFactory

#client_factory

Constructor Details

#initialize(options = {}) ⇒ Indexer

Returns a new instance of Indexer.

Raises:

  • (ArgumentError)


18
19
20
21
22
# File 'lib/metacrunch/elasticsearch/indexer.rb', line 18

def initialize(options = {})
  (@client_args = options).deep_symbolize_keys!
  extract_options!(@client_args, :_client_options_, :bulk_size, :callbacks, :id_accessor, :index, :logger, :type)
  raise ArgumentError.new("You have to supply an index name!") if @index.blank?
end

Instance Attribute Details

#bulk_sizeObject

Returns the value of attribute bulk_size.



11
12
13
# File 'lib/metacrunch/elasticsearch/indexer.rb', line 11

def bulk_size
  @bulk_size
end

#callbacksObject

Returns the value of attribute callbacks.



12
13
14
# File 'lib/metacrunch/elasticsearch/indexer.rb', line 12

def callbacks
  @callbacks
end

#id_accessorObject

Returns the value of attribute id_accessor.



13
14
15
# File 'lib/metacrunch/elasticsearch/indexer.rb', line 13

def id_accessor
  @id_accessor
end

#indexObject

Returns the value of attribute index.



14
15
16
# File 'lib/metacrunch/elasticsearch/indexer.rb', line 14

def index
  @index
end

#loggerObject

Returns the value of attribute logger.



15
16
17
# File 'lib/metacrunch/elasticsearch/indexer.rb', line 15

def logger
  @logger
end

#typeObject

Returns the value of attribute type.



16
17
18
# File 'lib/metacrunch/elasticsearch/indexer.rb', line 16

def type
  @type
end

Instance Method Details

#call(items = [], pipeline = nil) ⇒ Object



24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# File 'lib/metacrunch/elasticsearch/indexer.rb', line 24

def call(items = [], pipeline = nil)
  logger = pipeline.try(:logger) || @logger

  if (slice_size = @bulk_size || items.length) > 0
    client = client_factory

    items.each_slice(slice_size) do |_item_slice|
      # bodies is an array to allow slicing in case of HTTP content length exceed
      bodies = [_item_slice.inject([]) { |_memo, _item| _memo.concat bulk_item_factory(_item) }]

      bulk_responses =
      begin
        bodies.map do |_body|
          client.bulk body: _body
        end
      rescue
        logger.info "Bulk index failed. Decreasing bulk size temporary and trying again." if logger

        bodies = bodies.inject([]) do |_memo, _body|
          # Since we have to work with the bulk request body instead if the original items
          # the bodys length has to be a multiple of 2 in any case. .fdiv(2).fdiv(2).ceil * 2
          # ensures this. Example 3698.fdiv(2).fdiv(2).fdiv(2).ceil * 2 == 1850
          _memo.concat(_body.each_slice(_body.length.fdiv(2).fdiv(2).ceil * 2).to_a)
        end

        retry
      end

      bulk_responses.each do |_bulk_response|
        log_items_indexed(logger, _bulk_response["items"].length, client) if logger

        if after_indexed_callback = (@callbacks || {})[:after_indexed]
          _item_slice.zip(_bulk_response["items"]).each do |_item, _item_response|
            after_indexed_callback.call(_item, _item_response)
          end
        end
      end
    end
  end
end