Method: Fluent::Plugin::Buffer#write_step_by_step
- Defined in:
- lib/fluent/plugin/buffer.rb
#write_step_by_step(metadata, data, format, splits_count, &block) ⇒ Object
-
split event streams into many (10 -> 100 -> 1000 -> …) chunks
-
append splits into the staged chunks as much as possible
-
create unstaged chunk and append rest splits -> repeat it for all splits
|
# File 'lib/fluent/plugin/buffer.rb', line 729 def write_step_by_step(, data, format, splits_count, &block) splits = [] if splits_count > data.size splits_count = data.size end slice_size = if data.size % splits_count == 0 data.size / splits_count else data.size / (splits_count - 1) end slice_origin = 0 while slice_origin < data.size splits << data.slice(slice_origin, slice_size) slice_origin += slice_size end # This method will append events into the staged chunk at first. # Then, will generate chunks not staged (not queued) to append rest data. staged_chunk_used = false modified_chunks = [] = get_next_chunk = ->(){ if staged_chunk_used # Staging new chunk here is bad idea: # Recovering whole state including newly staged chunks is much harder than current implementation. = .dup_next generate_chunk() else synchronize { @stage[] ||= generate_chunk().staged! } end } writing_splits_index = 0 enqueue_chunk_before_retry = false while writing_splits_index < splits.size chunk = get_next_chunk.call errors = [] modified_chunks << {chunk: chunk, adding_bytesize: 0, errors: errors} chunk.synchronize do raise ShouldRetry unless chunk.writable? staged_chunk_used = true if chunk.staged? original_bytesize = committed_bytesize = chunk.bytesize begin while writing_splits_index < splits.size split = splits[writing_splits_index] formatted_split = format ? format.call(split) : nil if split.size == 1 # Check BufferChunkOverflowError determined_bytesize = nil if @compress != :text determined_bytesize = nil elsif formatted_split determined_bytesize = formatted_split.bytesize elsif split.first.respond_to?(:bytesize) determined_bytesize = split.first.bytesize end if determined_bytesize && determined_bytesize > @chunk_limit_size # It is a obvious case that BufferChunkOverflowError should be raised here. # But if it raises here, already processed 'split' or # the proceeding 'split' will be lost completely. # So it is a last resort to delay raising such a exception errors << "a #{determined_bytesize} bytes record (nth: #{writing_splits_index}) is larger than buffer chunk limit size (#{@chunk_limit_size})" writing_splits_index += 1 next end if determined_bytesize.nil? || chunk.bytesize + determined_bytesize > @chunk_limit_size # The split will (might) cause size over so keep already processed # 'split' content here (allow performance regression a bit). chunk.commit committed_bytesize = chunk.bytesize end end if format chunk.concat(formatted_split, split.size) else chunk.append(split, compress: @compress) end adding_bytes = chunk.bytesize - committed_bytesize if chunk_size_over?(chunk) # split size is larger than difference between size_full? and size_over? chunk.rollback committed_bytesize = chunk.bytesize if split.size == 1 # Check BufferChunkOverflowError again if adding_bytes > @chunk_limit_size errors << "concatenated/appended a #{adding_bytes} bytes record (nth: #{writing_splits_index}) is larger than buffer chunk limit size (#{@chunk_limit_size})" writing_splits_index += 1 next else # As already processed content is kept after rollback, then unstaged chunk should be queued. # After that, re-process current split again. # New chunk should be allocated, to do it, modify @stage and so on. synchronize { @stage.delete() } staged_chunk_used = false chunk.unstaged! break end end if chunk_size_full?(chunk) || split.size == 1 enqueue_chunk_before_retry = true else splits_count *= 10 end raise ShouldRetry end writing_splits_index += 1 if chunk_size_full?(chunk) break end end rescue chunk.purge if chunk.unstaged? # unstaged chunk will leak unless purge it raise end modified_chunks.last[:adding_bytesize] = chunk.bytesize - original_bytesize end end modified_chunks.each do |data| block.call(data[:chunk], data[:adding_bytesize], data[:errors]) end rescue ShouldRetry modified_chunks.each do |data| chunk = data[:chunk] chunk.rollback rescue nil if chunk.unstaged? chunk.purge rescue nil end end enqueue_chunk() if enqueue_chunk_before_retry retry end |