Class: Fairy::PGroupBy::CommandMergeSortBuffer

Inherits:
OnMemoryBuffer show all
Defined in:
lib/fairy/node/p-group-by.rb

Direct Known Subclasses

DirectKBMergeSortBuffer, MergeSortBuffer

Instance Attribute Summary

Attributes inherited from OnMemoryBuffer

#log_id

Instance Method Summary collapse

Constructor Details

#initialize(njob, policy) ⇒ CommandMergeSortBuffer

Returns a new instance of CommandMergeSortBuffer.



309
310
311
312
313
314
315
316
317
318
# File 'lib/fairy/node/p-group-by.rb', line 309

def initialize(njob, policy)
	super

	@key_values_size = 0

	@threshold = policy[:threshold]
	@threshold ||= CONF.GROUP_BY_CMSB_THRESHOLD

	@buffers = nil
end

Instance Method Details

#each(&block) ⇒ Object



380
381
382
383
384
385
386
# File 'lib/fairy/node/p-group-by.rb', line 380

def each(&block)
	if @buffers
	  each_2ndmemory &block
	else
	  super
	end
end

#each_2ndmemory(&block) ⇒ Object



388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
# File 'lib/fairy/node/p-group-by.rb', line 388

def each_2ndmemory(&block)
	unless @key_values.empty?
	  store_2ndmemory(@key_values)
	end

	Log::debug(self, @buffers.collect{|b| b.path}.join(" "))

	IO::popen("sort -m -k1,1 #{@buffers.collect{|b| b.path}.join(' ')}") do |io|
	  key = nil
	  values = nil
	  io.each do |line|
	    mk, mv = line.split(/\s+/)
	    k = Marshal.load(mk.tr(":", "\n").unpack("m").first)
	    v = Marshal.load(mv.tr(":", "\n").unpack("m").first)
	    if key == k
 values.concat v
	    else
 if values
		values.push_eos
		yield values
 end
 key = k
 values = KeyValueStream.new(key, self)
 values.concat v
	    end
	  end
	  if values
	    values.push_eos
	    yield values
	  end
	end
end

#init_2ndmemoryObject



320
321
322
323
324
325
326
327
# File 'lib/fairy/node/p-group-by.rb', line 320

def init_2ndmemory
	require "fairy/share/fast-tempfile"

	@buffer_dir = @policy[:buffer_dir]
	@buffer_dir ||= CONF.TMP_DIR

	@buffers = []
end

#open_buffer(&block) ⇒ Object



329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
# File 'lib/fairy/node/p-group-by.rb', line 329

def open_buffer(&block)
	unless @buffers
	  init_2ndmemory
	end
	buffer = FastTempfile.open("mod-group-by-buffer-#{@njob.no}-", @buffer_dir)
	@buffers.push buffer
	if block_given?
	  begin
	    # ruby BUG#2390の対応のため.
	    # yield buffer
	    yield buffer.io
	  ensure
	    buffer.close
	  end
	else
	  buffer
	end
end

#push(value) ⇒ Object



348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
# File 'lib/fairy/node/p-group-by.rb', line 348

def push(value)
	super

	@key_values_size += 1
	key_values = nil
	@key_values_mutex.synchronize do
	  if @key_values_size > @threshold
	    key_values = @key_values
	    @key_values_size = 0
	    @key_values = {}
	  end
	  if key_values
	    store_2ndmemory(key_values)
	  end
	end
end

#store_2ndmemory(key_values) ⇒ Object



365
366
367
368
369
370
371
372
373
374
375
376
377
378
# File 'lib/fairy/node/p-group-by.rb', line 365

def store_2ndmemory(key_values)
	Log::info(self, "start store")
	sorted = key_values.collect{|key, values| 
	  [[Marshal.dump(key)].pack("m").tr("\n", ":"), 
	    [Marshal.dump(values)].pack("m").tr("\n", ":")]}.sort_by{|e| e.first}

	open_buffer do |io|
	  sorted.each do |k, v|
	    io.puts "#{k}\t#{v}"
	  end
	end
	sorted = nil
	Log::info(self, "end store")
end