Class: Remi::Transform::Partitioner

Inherits:
Remi::Transform show all
Defined in:
lib/remi/transform.rb

Overview

Public: Used to partition elements into groups (buckets).

buckets - A hash where the keys are groups and the values are weights or percentages. current_population - A hashable object holding a count of the current number of

elements in each bucket.

Example:

# The current population has 2 record in the A bucket and 3 in B
current_pop = Daru::Vector.new([2,3], index: ['A', 'B'])

# We want to generate 7 new records that will evenly populate the A, B, and C buckets, given the current populations.
part = Remi::Transform::Partitioner.new(buckets: { 'A' => 1, 'B' => 1,'C' => 1 }, initial_population: current_pop)

1.upt(7).map { |iter| part.call } # => ["C", "C", "A", "C", "C", "B", "A"]

Instance Attribute Summary collapse

Attributes inherited from Remi::Transform

#multi_args, #source_metadata, #target_metadata

Instance Method Summary collapse

Methods inherited from Remi::Transform

#call, #to_proc

Constructor Details

#initialize(buckets:, initial_population: {}, **kargs, &block) ⇒ Partitioner

Returns a new instance of Partitioner.



710
711
712
713
714
# File 'lib/remi/transform.rb', line 710

def initialize(buckets:, initial_population: {}, **kargs, &block)
  super
  @buckets = buckets
  @current_population = sanitize_initial_population(buckets, initial_population)
end

Instance Attribute Details

#bucketsObject (readonly)

Returns the value of attribute buckets.



716
717
718
# File 'lib/remi/transform.rb', line 716

def buckets
  @buckets
end

#current_populationObject (readonly)

Returns the value of attribute current_population.



717
718
719
# File 'lib/remi/transform.rb', line 717

def current_population
  @current_population
end

Instance Method Details

#get_next_valueObject



731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
# File 'lib/remi/transform.rb', line 731

def get_next_value
  assigned = @buckets.max_by do |(group, weight)|
    expected = @buckets[group] / total_weight * size
    actual = @current_population[group]

    diff = expected - actual
    if diff > 0
      rand**(1.0 / diff)
    else
      -rand**(- 1.0 / @buckets[group])
    end
  end.first

  @current_population[assigned] += 1
  @size += 1

  assigned
end

#sizeObject



723
724
725
# File 'lib/remi/transform.rb', line 723

def size
  @size ||= @current_population.reduce(0) { |sum, (group, n)| sum += n }
end

#total_weightObject



727
728
729
# File 'lib/remi/transform.rb', line 727

def total_weight
  @total_weight ||= @buckets.reduce(0) { |sum, (bucket, weight)| sum += 1.0 * weight }
end

#transformObject



719
720
721
# File 'lib/remi/transform.rb', line 719

def transform
  get_next_value
end