Module: Daru::Category
- Defined in:
- lib/daru/category.rb
Overview
rubocop:disable Metrics/ModuleLength
Constant Summary collapse
- UNDEFINED =
Object.new.freeze
- CODING_SCHEMES =
%i[dummy deviation helmert simple].freeze
Instance Attribute Summary collapse
-
#base_category ⇒ Object
Returns the value of attribute base_category.
-
#coding_scheme ⇒ Object
Returns the value of attribute coding_scheme.
-
#index ⇒ Object
Returns the value of attribute index.
-
#name ⇒ Object
Returns the value of attribute name.
Instance Method Summary collapse
-
#==(other) ⇒ Object
Two categorical vectors are equal if their index and corresponding values are same return [true, false] true if two vectors are similar.
-
#[](*indexes) ⇒ Object
Returns vector for indexes/positions specified.
-
#[]=(*indexes, val) ⇒ Object
Modifies values at specified indexes/positions.
-
#add_category(*new_categories) ⇒ Object
Associates a category to the vector.
-
#at(*positions) ⇒ Object
Returns vector for positions specified.
-
#categories ⇒ Array
(also: #order)
Returns all the categories with the inherent order.
-
#categories=(cat_with_order) ⇒ Object
Sets order of the categories.
-
#contrast_code(opts = {}) ⇒ Daru::DataFrame
Contrast code the vector acording to the coding scheme set.
-
#count(category = UNDEFINED) ⇒ Object
Returns frequency of given category.
-
#count_values(*values) ⇒ Integer
Count the number of values specified.
-
#describe ⇒ Daru::Vector
Gives the summary of data using following parameters - size: size of the data - categories: total number of categories - max_freq: Max no of times a category occurs - max_category: The category which occurs max no of times - min_freq: Min no of times a category occurs - min_category: The category which occurs min no of times.
-
#dup ⇒ Daru::Vector
Duplicated a vector.
-
#each ⇒ Enumerator
Returns an enumerator that enumerates on categorical data.
-
#frequencies(type = :count) ⇒ Daru::Vector
Returns a vector storing count/frequency of each category.
-
#include_values?(*values) ⇒ true, false
Check if any one of mentioned values occur in the vector.
-
#indexes(*values) ⇒ Array
Return indexes of values specified.
-
#initialize_category(data, opts = {}) ⇒ Object
Initializes a vector to store categorical data.
-
#max ⇒ object
Returns the maximum category acording to the order specified.
-
#min ⇒ object
Returns the minimum category acording to the order specified.
-
#ordered=(bool) ⇒ Object
Make categorical data ordered or unordered.
-
#ordered? ⇒ Boolean
Tells whether vector is ordered or not.
-
#plot(*args, **options, &b) ⇒ Object
this method is overwritten: see Daru::Category#plotting_library=.
- #plotting_library=(lib) ⇒ Object
- #positions(*values) ⇒ Object
-
#reindex!(idx) ⇒ Daru::Vector
Sets new index for vector.
-
#reject_values(*values) ⇒ Daru::Vector
Return a vector with specified values removed.
-
#remove_unused_categories ⇒ Daru::Vector
Removes the unused categories.
-
#rename_categories(old_to_new) ⇒ Object
Rename categories.
-
#reorder!(order) ⇒ Object
Reorder the vector with given positions.
-
#replace_values(old_values, new_value) ⇒ Daru::Vector
Replaces specified values with a new value.
-
#set_at(positions, val) ⇒ Object
Modifies values at specified positions.
-
#size ⇒ Object
Size of categorical data.
- #sort ⇒ Object
-
#sort! ⇒ Daru::Vector
Sorts the vector in the order specified.
-
#to_a ⇒ Array
Returns all categorical data.
-
#to_category ⇒ Daru::Vector
Does nothing since its already of type category.
-
#to_ints ⇒ Array
Returns integer coding for categorical data in the order starting from 0.
-
#to_non_category ⇒ Daru::Vector
Converts a category type vector to non category type vector.
-
#where(bool_array) ⇒ Daru::Vector
For querying the data.
Instance Attribute Details
#base_category ⇒ Object
Returns the value of attribute base_category.
5 6 7 |
# File 'lib/daru/category.rb', line 5 def base_category @base_category end |
#coding_scheme ⇒ Object
Returns the value of attribute coding_scheme.
6 7 8 |
# File 'lib/daru/category.rb', line 6 def coding_scheme @coding_scheme end |
#index ⇒ Object
Returns the value of attribute index.
6 7 8 |
# File 'lib/daru/category.rb', line 6 def index @index end |
#name ⇒ Object
Returns the value of attribute name.
6 7 8 |
# File 'lib/daru/category.rb', line 6 def name @name end |
Instance Method Details
#==(other) ⇒ Object
Two categorical vectors are equal if their index and corresponding values are same return [true, false] true if two vectors are similar
512 513 514 515 516 |
# File 'lib/daru/category.rb', line 512 def == other size == other.size && to_a == other.to_a && index == other.index end |
#[](*indexes) ⇒ Object
Since it accepts both indexes and postions. In case of collision, argument will be treated as index
Returns vector for indexes/positions specified
198 199 200 201 202 203 204 205 206 207 208 |
# File 'lib/daru/category.rb', line 198 def [] *indexes positions = @index.pos(*indexes) return category_from_position(positions) if positions.is_a? Integer Daru::Vector.new positions.map { |pos| category_from_position pos }, index: @index.subset(*indexes), name: @name, type: :category, ordered: @ordered, categories: categories end |
#[]=(*indexes, val) ⇒ Object
In order to add a new category you need to associate it via #add_category
Modifies values at specified indexes/positions.
252 253 254 255 256 257 258 259 260 261 |
# File 'lib/daru/category.rb', line 252 def []= *indexes, val positions = @index.pos(*indexes) if positions.is_a? Numeric modify_category_at positions, val else positions.each { |pos| modify_category_at pos, val } end self end |
#add_category(*new_categories) ⇒ Object
Associates a category to the vector.
131 132 133 134 |
# File 'lib/daru/category.rb', line 131 def add_category(*new_categories) new_categories -= categories add_extra_categories new_categories end |
#at(*positions) ⇒ Object
Returns vector for positions specified.
221 222 223 224 225 226 227 228 229 230 231 232 233 234 |
# File 'lib/daru/category.rb', line 221 def at *positions original_positions = positions positions = coerce_positions(*positions) validate_positions(*positions) return category_from_position(positions) if positions.is_a? Integer Daru::Vector.new positions.map { |pos| category_from_position(pos) }, index: @index.at(*original_positions), name: @name, type: :category, ordered: @ordered, categories: categories end |
#categories ⇒ Array Also known as: order
Returns all the categories with the inherent order
322 323 324 |
# File 'lib/daru/category.rb', line 322 def categories @cat_hash.keys end |
#categories=(cat_with_order) ⇒ Object
If extra categories are specified, they get added too.
Sets order of the categories.
336 337 338 339 340 |
# File 'lib/daru/category.rb', line 336 def categories= cat_with_order validate_categories(cat_with_order) add_extra_categories(cat_with_order - categories) order_with cat_with_order end |
#contrast_code(opts = {}) ⇒ Daru::DataFrame
To set the coding scheme use #coding_scheme=
Contrast code the vector acording to the coding scheme set.
494 495 496 497 498 499 500 501 |
# File 'lib/daru/category.rb', line 494 def contrast_code opts={} if opts[:user_defined] user_defined_coding(opts[:user_defined]) else # TODO: Make various coding schemes code DRY send("#{coding_scheme}_coding".to_sym, opts[:full] || false) end end |
#count(category = UNDEFINED) ⇒ Object
Returns frequency of given category
145 146 147 148 149 150 151 |
# File 'lib/daru/category.rb', line 145 def count category=UNDEFINED return @cat_hash.values.map(&:size).inject(&:+) if category == UNDEFINED # count all raise ArgumentError, "Invalid category #{category}" unless categories.include?(category) @cat_hash[category].size end |
#count_values(*values) ⇒ Integer
Count the number of values specified
710 711 712 713 714 |
# File 'lib/daru/category.rb', line 710 def count_values(*values) values.map { |v| @cat_hash[v].size if @cat_hash.include? v } .compact .inject(0, :+) end |
#describe ⇒ Daru::Vector
Gives the summary of data using following parameters
-
size: size of the data
-
categories: total number of categories
-
max_freq: Max no of times a category occurs
-
max_category: The category which occurs max no of times
-
min_freq: Min no of times a category occurs
-
min_category: The category which occurs min no of times
633 634 635 636 637 638 639 640 641 642 |
# File 'lib/daru/category.rb', line 633 def describe Daru::Vector.new( size: size, categories: categories.size, max_freq: @cat_hash.values.map(&:size).max, max_category: @cat_hash.keys.max_by { |cat| @cat_hash[cat].size }, min_freq: @cat_hash.values.map(&:size).min, min_category: @cat_hash.keys.min_by { |cat| @cat_hash[cat].size } ) end |
#dup ⇒ Daru::Vector
Duplicated a vector
115 116 117 118 119 120 121 122 |
# File 'lib/daru/category.rb', line 115 def dup Daru::Vector.new to_a.dup, name: @name, index: @index.dup, type: :category, categories: categories, ordered: ordered? end |
#each ⇒ Enumerator
Returns an enumerator that enumerates on categorical data
88 89 90 91 92 |
# File 'lib/daru/category.rb', line 88 def each return enum_for(:each) unless block_given? @array.each { |pos| yield cat_from_int pos } self end |
#frequencies(type = :count) ⇒ Daru::Vector
Returns a vector storing count/frequency of each category
164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 |
# File 'lib/daru/category.rb', line 164 def frequencies type=:count counts = @cat_hash.values.map(&:size) values = case type when :count counts when :fraction counts.map { |c| c / size.to_f } when :percentage counts.map { |c| c / size.to_f * 100 } else raise ArgumentError, 'Type should be either :count, :fraction or'\ " :percentage. #{type} not supported." end Daru::Vector.new values, index: categories, name: name end |
#include_values?(*values) ⇒ true, false
Check if any one of mentioned values occur in the vector
680 681 682 |
# File 'lib/daru/category.rb', line 680 def include_values?(*values) values.any? { |v| @cat_hash.include?(v) && !@cat_hash[v].empty? } end |
#indexes(*values) ⇒ Array
Return indexes of values specified
723 724 725 726 |
# File 'lib/daru/category.rb', line 723 def indexes(*values) values &= categories index.to_a.values_at(*values.flat_map { |v| @cat_hash[v] }.sort) end |
#initialize_category(data, opts = {}) ⇒ Object
Base category is set to the first category encountered in the vector.
Initializes a vector to store categorical data.
28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
# File 'lib/daru/category.rb', line 28 def initialize_category data, opts={} @type = :category initialize_core_attributes data if opts[:categories] validate_categories(opts[:categories]) add_extra_categories(opts[:categories] - categories) order_with opts[:categories] end # Specify if the categories are ordered or not. # By default its unordered @ordered = opts[:ordered] || false # The coding scheme to code with. Default is dummy coding. @coding_scheme = :dummy # Base category which won't be present in the coding @base_category = @cat_hash.keys.first # Stores the name of the vector @name = opts[:name] # Index of the vector @index = coerce_index opts[:index] self end |
#max ⇒ object
This operation will only work if vector is ordered. To set the vector ordered do ‘vector.ordered = true`
Returns the maximum category acording to the order specified.
416 417 418 419 |
# File 'lib/daru/category.rb', line 416 def max assert_ordered :max categories.last end |
#min ⇒ object
This operation will only work if vector is ordered. To set the vector ordered do ‘vector.ordered = true`
Returns the minimum category acording to the order specified.
402 403 404 405 |
# File 'lib/daru/category.rb', line 402 def min assert_ordered :min categories.first end |
#ordered=(bool) ⇒ Object
Make categorical data ordered or unordered.
310 311 312 |
# File 'lib/daru/category.rb', line 310 def ordered= bool @ordered = bool end |
#ordered? ⇒ Boolean
Tells whether vector is ordered or not.
299 300 301 |
# File 'lib/daru/category.rb', line 299 def ordered? @ordered end |
#plot(*args, **options, &b) ⇒ Object
this method is overwritten: see Daru::Category#plotting_library=
78 79 80 81 82 |
# File 'lib/daru/category.rb', line 78 def plot(*args, **, &b) init_plotting_library plot(*args, **, &b) end |
#plotting_library=(lib) ⇒ Object
62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
# File 'lib/daru/category.rb', line 62 def plotting_library= lib case lib when :gruff, :nyaplot @plotting_library = lib if Daru.send("has_#{lib}?".to_sym) extend Module.const_get( "Daru::Plotting::Category::#{lib.to_s.capitalize}Library" ) end else raise ArgumentError, "Plotting library #{lib} not supported. "\ 'Supported libraries are :nyaplot and :gruff' end end |
#positions(*values) ⇒ Object
751 752 753 754 |
# File 'lib/daru/category.rb', line 751 def positions(*values) values &= categories values.flat_map { |v| @cat_hash[v] }.sort end |
#reindex!(idx) ⇒ Daru::Vector
Unlike #reorder! which takes positions as input it takes index as an input to reorder the vector
Sets new index for vector. Preserves index->value correspondence.
565 566 567 568 569 570 571 572 573 574 575 576 |
# File 'lib/daru/category.rb', line 565 def reindex! idx idx = Daru::Index.new idx unless idx.is_a? Daru::Index raise ArgumentError, 'Invalid index specified' unless idx.to_a.sort == index.to_a.sort old_categories = categories data = idx.map { |i| self[i] } initialize_core_attributes data self.categories = old_categories self.index = idx self end |
#reject_values(*values) ⇒ Daru::Vector
Return a vector with specified values removed
693 694 695 696 697 698 699 700 701 |
# File 'lib/daru/category.rb', line 693 def reject_values(*values) resultant_pos = size.times.to_a - values.flat_map { |v| @cat_hash[v] } dv = at(*resultant_pos) unless dv.is_a? Daru::Vector pos = resultant_pos.first dv = at(pos..pos) end dv.remove_unused_categories end |
#remove_unused_categories ⇒ Daru::Vector
If base category is removed, then the first occuring category in the data is taken as base category. Order of the undeleted categories remains preserved.
Removes the unused categories
383 384 385 386 387 388 389 390 391 |
# File 'lib/daru/category.rb', line 383 def remove_unused_categories old_categories = categories initialize_core_attributes to_a self.categories = old_categories & categories self.base_category = @cat_hash.keys.first unless categories.include? base_category self end |
#rename_categories(old_to_new) ⇒ Object
The order of categories after renaming is preserved but new categories are added at the end in the order. Also the base-category is reassigned to new value if it is renamed
Rename categories.
358 359 360 361 362 363 364 365 366 367 368 369 |
# File 'lib/daru/category.rb', line 358 def rename_categories old_to_new old_categories = categories data = to_a.map do |cat| old_to_new.include?(cat) ? old_to_new[cat] : cat end initialize_core_attributes data self.categories = (old_categories - old_to_new.keys) | old_to_new.values self.base_category = old_to_new[base_category] if old_to_new.include? base_category self end |
#reorder!(order) ⇒ Object
Unlike #reindex! which takes index as input, it takes positions as an input to reorder the vector
Reorder the vector with given positions
543 544 545 546 547 548 549 550 551 |
# File 'lib/daru/category.rb', line 543 def reorder! order raise ArgumentError, 'Invalid order specified' unless order.sort == size.times.to_a # TODO: Room for optimization old_data = to_a new_data = order.map { |i| old_data[i] } initialize_core_attributes new_data self end |
#replace_values(old_values, new_value) ⇒ Daru::Vector
It performs the replace in place.
Replaces specified values with a new value
745 746 747 748 749 |
# File 'lib/daru/category.rb', line 745 def replace_values old_values, new_value old_values = [old_values] unless old_values.is_a? Array rename_hash = old_values.map { |v| [v, new_value] }.to_h rename_categories rename_hash end |
#set_at(positions, val) ⇒ Object
Modifies values at specified positions.
277 278 279 280 281 |
# File 'lib/daru/category.rb', line 277 def set_at positions, val validate_positions(*positions) positions.map { |pos| modify_category_at pos, val } self end |
#size ⇒ Object
Size of categorical data.
289 290 291 |
# File 'lib/daru/category.rb', line 289 def size @array.size end |
#sort ⇒ Object
459 460 461 |
# File 'lib/daru/category.rb', line 459 def sort dup.sort! end |
#sort! ⇒ Daru::Vector
This operation will only work if vector is ordered. To set the vector ordered, do ‘vector.ordered = true`
Sorts the vector in the order specified.
436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 |
# File 'lib/daru/category.rb', line 436 def sort! # rubocop:disable Metrics/AbcSize # TODO: Simply the code assert_ordered :sort # Build sorted index old_index = @index.to_a new_index = @cat_hash.values.map do |positions| old_index.values_at(*positions) end.flatten @index = @index.class.new new_index # Build sorted data @cat_hash = categories.inject([{}, 0]) do |acc, cat| hash, count = acc cat_count = @cat_hash[cat].size cat_count.times { |i| @array[count+i] = int_from_cat(cat) } hash[cat] = (count...(cat_count+count)).to_a [hash, count + cat_count] end.first self end |
#to_a ⇒ Array
Returns all categorical data
100 101 102 |
# File 'lib/daru/category.rb', line 100 def to_a each.to_a end |
#to_category ⇒ Daru::Vector
Does nothing since its already of type category.
646 647 648 |
# File 'lib/daru/category.rb', line 646 def to_category self end |
#to_ints ⇒ Array
Returns integer coding for categorical data in the order starting from 0. For example if order is [:a, :b, :c], then :a, will be coded as 0, :b as 1 and :c as 2
527 528 529 |
# File 'lib/daru/category.rb', line 527 def to_ints @array end |
#to_non_category ⇒ Daru::Vector
Converts a category type vector to non category type vector
652 653 654 |
# File 'lib/daru/category.rb', line 652 def to_non_category Daru::Vector.new to_a, name: name, index: index end |
#where(bool_array) ⇒ Daru::Vector
For querying the data
610 611 612 |
# File 'lib/daru/category.rb', line 610 def where bool_array Daru::Core::Query.vector_where self, bool_array end |