Class: Remi::Transform::Partitioner

Inherits:
Remi::Transform show all
Defined in:
lib/remi/transform.rb

Overview

Public: Used to partition elements into groups (buckets).

buckets - A hash where the keys are groups and the values are weights or percentages. current_population - A hashable object holding a count of the current number of

elements in each bucket.

Example:

# The current population has 2 record in the A bucket and 3 in B
current_pop = Daru::Vector.new([2,3], index: ['A', 'B'])

# We want to generate 7 new records that will evenly populate the A, B, and C buckets, given the current populations.
part = Remi::Transform::Partitioner.new(buckets: { 'A' => 1, 'B' => 1,'C' => 1 }, initial_population: current_pop)

1.upt(7).map { |iter| part.call } # => ["C", "C", "A", "C", "C", "B", "A"]

Instance Attribute Summary collapse

Attributes inherited from Remi::Transform

#multi_arg, #source_metadata, #target_metadata

Instance Method Summary collapse

Methods inherited from Remi::Transform

#call, #to_proc

Constructor Details

#initialize(buckets:, initial_population: {}, **kargs, &block) ⇒ Partitioner



655
656
657
658
659
# File 'lib/remi/transform.rb', line 655

def initialize(buckets:, initial_population: {}, **kargs, &block)
  super
  @buckets = buckets
  @current_population = sanitize_initial_population(buckets, initial_population)
end

Instance Attribute Details

#bucketsObject (readonly)

Returns the value of attribute buckets.



661
662
663
# File 'lib/remi/transform.rb', line 661

def buckets
  @buckets
end

#current_populationObject (readonly)

Returns the value of attribute current_population.



662
663
664
# File 'lib/remi/transform.rb', line 662

def current_population
  @current_population
end

Instance Method Details

#get_next_valueObject



676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
# File 'lib/remi/transform.rb', line 676

def get_next_value
  assigned = @buckets.max_by do |(group, weight)|
    expected = @buckets[group] / total_weight * size
    actual = @current_population[group]

    diff = expected - actual
    if diff > 0
      rand**(1.0 / diff)
    else
      -rand**(- 1.0 / @buckets[group])
    end
  end.first

  @current_population[assigned] += 1
  @size += 1

  assigned
end

#sizeObject



668
669
670
# File 'lib/remi/transform.rb', line 668

def size
  @size ||= @current_population.reduce(0) { |sum, (group, n)| sum += n }
end

#total_weightObject



672
673
674
# File 'lib/remi/transform.rb', line 672

def total_weight
  @total_weight ||= @buckets.reduce(0) { |sum, (bucket, weight)| sum += 1.0 * weight }
end

#transform(*values) ⇒ Object



664
665
666
# File 'lib/remi/transform.rb', line 664

def transform(*values)
  get_next_value
end