Method: Array#histogram
- Defined in:
- lib/openc3/core_ext/array.rb
#histogram(num_buckets = nil, numeric = false) ⇒ Array<Array(first_value, last_value, num_values)>
Returns Array of buckets which are arrays containing the first value that is found in the bucket, the last value found in the bucket, and the total number of values in the bucket.
293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 |
# File 'lib/openc3/core_ext/array.rb', line 293 def histogram(num_buckets = nil, numeric = false, &) buckets = {} # Count the occurrence of each value self.each do |value| buckets[value] ||= 0 buckets[value] += 1 end # Sort buckets by value, use block for sorting if given if block_given? sorted_buckets = buckets.sort { |x, y| yield(x, y) } else sorted_buckets = buckets.sort end reduced_buckets = [] if num_buckets # Validate num_buckets raise "Invalid num_buckets #{num_buckets}" if num_buckets.to_i <= 0 # Handle histogram types if numeric # Numeric histograms use the same sized range for each bucket first_value = sorted_buckets[0][0] last_value = sorted_buckets[-1][0] delta = last_value - first_value bucket_size = delta.to_f / num_buckets.to_f integers = false integers = true if first_value.kind_of?(Integer) and last_value.kind_of?(Integer) if integers bucket_size = bucket_size.ceil last_value = first_value + (bucket_size * num_buckets) - 1 delta = last_value - first_value (delta + 1).times do |index| buckets[first_value + index] ||= 0 end if block_given? sorted_buckets = buckets.sort { |val1, val2| yield(val1, val2) } else sorted_buckets = buckets.sort end end bucket_ranges = [] current_value = first_value num_buckets.times do |bucket_index| if bucket_index == (num_buckets - 1) bucket_ranges[bucket_index] = (current_value)..(last_value) else if integers bucket_ranges[bucket_index] = (current_value)..(current_value + bucket_size - 1) else bucket_ranges[bucket_index] = (current_value)..(current_value + bucket_size) end end current_value += bucket_size end # Build the final buckets first_index = 0 sorted_index = 0 num_buckets.times do |bucket_index| break if sorted_index > (sorted_buckets.length - 1) sum = 0 bucket_range = bucket_ranges[bucket_index] while bucket_range.include?(sorted_buckets[sorted_index][0]) sum += sorted_buckets[sorted_index][1] sorted_index += 1 break if sorted_index > (sorted_buckets.length - 1) end reduced_buckets[bucket_index] = [bucket_range.first, bucket_range.last, sum] end else # Non-numeric histograms use the same number of items per bucket items_per_bucket = sorted_buckets.length / num_buckets.to_i items_per_bucket = 1 if items_per_bucket < 1 bucket_sizes = [items_per_bucket] * num_buckets excess_items = sorted_buckets.length - (items_per_bucket * num_buckets) if excess_items > 0 bucket_sizes.length.times do |bucket_size_index| break if excess_items <= 0 bucket_sizes[bucket_size_index] += 1 excess_items -= 1 end end # Build the final buckets first_index = 0 num_buckets.times do |bucket_index| break if first_index > (sorted_buckets.length - 1) if bucket_index == (num_buckets - 1) last_index = sorted_buckets.length - 1 else last_index = first_index + bucket_sizes[bucket_index] - 1 last_index = sorted_buckets.length - 1 if last_index > (sorted_buckets.length - 1) end sum = 0 sorted_buckets[first_index..last_index].each { |_key, value| sum += value } reduced_buckets[bucket_index] = [sorted_buckets[first_index][0], sorted_buckets[last_index][0], sum] first_index = first_index + bucket_sizes[bucket_index] end end else sorted_buckets.each { |key, value| reduced_buckets << [key, key, value] } end reduced_buckets end |