Method: Array#histogram

Defined in:
lib/openc3/core_ext/array.rb

#histogram(num_buckets = nil, numeric = false) ⇒ Array<Array(first_value, last_value, num_values)>

Returns Array of buckets which are arrays containing the first value that is found in the bucket, the last value found in the bucket, and the total number of values in the bucket.

Parameters:

  • num_buckets (Integer) (defaults to: nil)

    The number of buckets (groups of numbers) that will be used when histogramming. nil indicates to use as many buckets as necessary to cause each bucket to have a unique element.

  • numeric (Boolean) (defaults to: false)

    Whether the array data is numeric

  • block (Proc)

    If a block is given it will be called to sort buckets with the same object. This might be necessary if your data is not numeric and you want to override the way your objects compare.

Returns:

  • (Array<Array(first_value, last_value, num_values)>)

    Array of buckets which are arrays containing the first value that is found in the bucket, the last value found in the bucket, and the total number of values in the bucket.



293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
# File 'lib/openc3/core_ext/array.rb', line 293

def histogram(num_buckets = nil, numeric = false, &)
  buckets = {}

  # Count the occurrence of each value
  self.each do |value|
    buckets[value] ||= 0
    buckets[value] += 1
  end

  # Sort buckets by value, use block for sorting if given
  if block_given?
    sorted_buckets = buckets.sort { |x, y| yield(x, y) }
  else
    sorted_buckets = buckets.sort
  end

  reduced_buckets = []
  if num_buckets
    # Validate num_buckets
    raise "Invalid num_buckets #{num_buckets}" if num_buckets.to_i <= 0

    # Handle histogram types
    if numeric
      # Numeric histograms use the same sized range for each bucket
      first_value   = sorted_buckets[0][0]
      last_value    = sorted_buckets[-1][0]
      delta         = last_value - first_value
      bucket_size   = delta.to_f / num_buckets.to_f
      integers      = false
      integers      = true if first_value.kind_of?(Integer) and last_value.kind_of?(Integer)
      if integers
        bucket_size = bucket_size.ceil
        last_value = first_value + (bucket_size * num_buckets) - 1
        delta = last_value - first_value
        (delta + 1).times do |index|
          buckets[first_value + index] ||= 0
        end
        if block_given?
          sorted_buckets = buckets.sort { |val1, val2| yield(val1, val2) }
        else
          sorted_buckets = buckets.sort
        end
      end
      bucket_ranges = []
      current_value = first_value
      num_buckets.times do |bucket_index|
        if bucket_index == (num_buckets - 1)
          bucket_ranges[bucket_index] = (current_value)..(last_value)
        else
          if integers
            bucket_ranges[bucket_index] = (current_value)..(current_value + bucket_size - 1)
          else
            bucket_ranges[bucket_index] = (current_value)..(current_value + bucket_size)
          end
        end
        current_value += bucket_size
      end

      # Build the final buckets
      first_index  = 0
      sorted_index = 0
      num_buckets.times do |bucket_index|
        break if sorted_index > (sorted_buckets.length - 1)

        sum = 0
        bucket_range = bucket_ranges[bucket_index]
        while bucket_range.include?(sorted_buckets[sorted_index][0])
          sum += sorted_buckets[sorted_index][1]
          sorted_index += 1
          break if sorted_index > (sorted_buckets.length - 1)
        end
        reduced_buckets[bucket_index] = [bucket_range.first, bucket_range.last, sum]
      end
    else
      # Non-numeric histograms use the same number of items per bucket
      items_per_bucket = sorted_buckets.length / num_buckets.to_i
      items_per_bucket = 1 if items_per_bucket < 1
      bucket_sizes     = [items_per_bucket] * num_buckets
      excess_items     = sorted_buckets.length - (items_per_bucket * num_buckets)
      if excess_items > 0
        bucket_sizes.length.times do |bucket_size_index|
          break if excess_items <= 0

          bucket_sizes[bucket_size_index] += 1
          excess_items -= 1
        end
      end

      # Build the final buckets
      first_index = 0
      num_buckets.times do |bucket_index|
        break if first_index > (sorted_buckets.length - 1)

        if bucket_index == (num_buckets - 1)
          last_index = sorted_buckets.length - 1
        else
          last_index = first_index + bucket_sizes[bucket_index] - 1
          last_index = sorted_buckets.length - 1 if last_index > (sorted_buckets.length - 1)
        end
        sum = 0
        sorted_buckets[first_index..last_index].each { |_key, value| sum += value }
        reduced_buckets[bucket_index] = [sorted_buckets[first_index][0], sorted_buckets[last_index][0], sum]
        first_index = first_index + bucket_sizes[bucket_index]
      end
    end
  else
    sorted_buckets.each { |key, value| reduced_buckets << [key, key, value] }
  end
  reduced_buckets
end