Module: DaruLite::Category

Defined in:
lib/daru_lite/category.rb

Overview

rubocop:disable Metrics/ModuleLength

Constant Summary collapse

UNDEFINED =
Object.new.freeze
CODING_SCHEMES =
i[dummy deviation helmert simple].freeze

Instance Attribute Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#base_categoryObject

Returns the value of attribute base_category.



5
6
7
# File 'lib/daru_lite/category.rb', line 5

def base_category
  @base_category
end

#coding_schemeObject

Returns the value of attribute coding_scheme.



6
7
8
# File 'lib/daru_lite/category.rb', line 6

def coding_scheme
  @coding_scheme
end

#indexObject

Returns the value of attribute index.



6
7
8
# File 'lib/daru_lite/category.rb', line 6

def index
  @index
end

#nameObject

Returns the value of attribute name.



6
7
8
# File 'lib/daru_lite/category.rb', line 6

def name
  @name
end

Instance Method Details

#==(other) ⇒ Object

Two categorical vectors are equal if their index and corresponding values are same return [true, false] true if two vectors are similar

Examples:

dv = DaruLite::Vector.new [:a, 1, :a, 1, :c], type: :category
other = DaruLite::Vector.new [:a, 1, :a, 1, :c],
  type: :category,
  index: 1..5
dv == other
# => false


492
493
494
495
496
# File 'lib/daru_lite/category.rb', line 492

def ==(other)
  size == other.size &&
    to_a == other.to_a &&
    index == other.index
end

#[](*indexes) ⇒ Object

Note:

Since it accepts both indexes and postions. In case of collision, argument will be treated as index

Returns vector for indexes/positions specified

Examples:

dv = DaruLite::Vector.new [:a, 1, :a, 1, :c],
  type: :category,
  index: 'a'..'e'
dv[:a, 1]
# => #<DaruLite::Vector(2)>
#   a   a
#   b   1
dv[0, 1]
# => #<DaruLite::Vector(2)>
#   a   a
#   b   1


177
178
179
180
181
182
183
184
185
186
187
# File 'lib/daru_lite/category.rb', line 177

def [](*indexes)
  positions = @index.pos(*indexes)
  return category_from_position(positions) if positions.is_a? Integer

  DaruLite::Vector.new positions.map { |pos| category_from_position pos },
                       index: @index.subset(*indexes),
                       name: @name,
                       type: :category,
                       ordered: @ordered,
                       categories: categories
end

#[]=(*indexes, val) ⇒ Object

Note:

In order to add a new category you need to associate it via #add_category

Modifies values at specified indexes/positions.

Examples:

dv = DaruLite::Vector.new [:a, 1, :a, 1, :c], type: :category
dv.add_category :b
dv[0] = :b
dv
# => #<DaruLite::Vector(5)>
#   0   b
#   1   1
#   2   a
#   3   1
#   4   c


231
232
233
234
235
236
237
238
239
240
# File 'lib/daru_lite/category.rb', line 231

def []=(*indexes, val)
  positions = @index.pos(*indexes)

  if positions.is_a? Numeric
    modify_category_at positions, val
  else
    positions.each { |pos| modify_category_at pos, val }
  end
  self
end

#add_category(*new_categories) ⇒ Object

Associates a category to the vector.

Examples:

dv = DaruLite::Vector.new [:a, 1, :a, 1, :c], type: :category
dv.add_category :b
dv.categories
# => [:a, :b, :c, 1]


110
111
112
113
# File 'lib/daru_lite/category.rb', line 110

def add_category(*new_categories)
  new_categories -= categories
  add_extra_categories new_categories
end

#at(*positions) ⇒ Object

Returns vector for positions specified.

Examples:

dv = DaruLite::Vector.new [:a, 1, :a, 1, :c], type: :category
dv.at 0..-2
# => #<DaruLite::Vector(4)>
#   0   a
#   1   1
#   2   a
#   3   1


200
201
202
203
204
205
206
207
208
209
210
211
212
213
# File 'lib/daru_lite/category.rb', line 200

def at(*positions)
  original_positions = positions
  positions = coerce_positions(*positions)
  validate_positions(*positions)

  return category_from_position(positions) if positions.is_a? Integer

  DaruLite::Vector.new positions.map { |pos| category_from_position(pos) },
                       index: @index.at(*original_positions),
                       name: @name,
                       type: :category,
                       ordered: @ordered,
                       categories: categories
end

#categoriesArray Also known as: order

Returns all the categories with the inherent order

Examples:

dv = DaruLite::Vector.new [:a, 1, :a, 1, :c],
  type: :category,
  categories: [:a, :b, :c, 1]
dv.categories
# => [:a, :b, :c, 1]


301
302
303
# File 'lib/daru_lite/category.rb', line 301

def categories
  @cat_hash.keys
end

#categories=(cat_with_order) ⇒ Object

Note:

If extra categories are specified, they get added too.

Sets order of the categories.

Examples:

dv = DaruLite::Vector.new [:a, 1, :a, 1, :c], type: :category
dv.categories = [:a, :b, :c, 1]
dv.categories
# => [:a, :b, :c, 1]


315
316
317
318
319
# File 'lib/daru_lite/category.rb', line 315

def categories=(cat_with_order)
  validate_categories(cat_with_order)
  add_extra_categories(cat_with_order - categories)
  order_with cat_with_order
end

#contrast_code(opts = {}) ⇒ DaruLite::DataFrame

Note:

To set the coding scheme use #coding_scheme=

Contrast code the vector acording to the coding scheme set.

Examples:

dv = DaruLite::Vector.new [:a, 1, :a, 1, :c], type: :category
dv.contrast_code full: false
# => #<DaruLite::DataFrame(5x2)>
#         daru_1 daru_c
#       0      0      0
#       1      1      0
#       2      0      0
#       3      1      0
#       4      0      1

Options Hash (opts):

  • :full (TrueClass, FalseClass) — default: false

    True if you want k variables for k categories, false if you want k-1 variables for k categories.



474
475
476
477
478
479
480
481
# File 'lib/daru_lite/category.rb', line 474

def contrast_code(opts = {})
  if opts[:user_defined]
    user_defined_coding(opts[:user_defined])
  else
    # TODO: Make various coding schemes code DRY
    send(:"#{coding_scheme}_coding", opts[:full] || false)
  end
end

#count(category = UNDEFINED) ⇒ Object

Returns frequency of given category

Examples:

dv = DaruLite::Vector.new [:a, 1, :a, 1, :c], type: :category
dv.count :a
# => 2
dv.count
# => 5

Raises:

  • (ArgumentError)


124
125
126
127
128
129
130
# File 'lib/daru_lite/category.rb', line 124

def count(category = UNDEFINED)
  return @cat_hash.values.sum(&:size) if category == UNDEFINED # count all
  raise ArgumentError, "Invalid category #{category}" unless
    categories.include?(category)

  @cat_hash[category].size
end

#count_values(*values) ⇒ Integer

Count the number of values specified

Examples:

dv = DaruLite::Vector.new [1, 2, 1, 2, 3, 4, nil, nil]
dv.count_values nil
# => 2


691
692
693
694
# File 'lib/daru_lite/category.rb', line 691

def count_values(*values)
  values.filter_map { |v| @cat_hash[v].size if @cat_hash.include? v }
        .sum
end

#describeDaruLite::Vector

Gives the summary of data using following parameters

  • size: size of the data

  • categories: total number of categories

  • max_freq: Max no of times a category occurs

  • max_category: The category which occurs max no of times

  • min_freq: Min no of times a category occurs

  • min_category: The category which occurs min no of times

Examples:

dv = DaruLite::Vector.new [:a, 1, :a, 1, :c], type: :category
dv.describe
# => #<DaruLite::Vector(6)>
#         size            5
#   categories            3
#     max_freq            2
# max_category            a
#     min_freq            1
# min_category            c


614
615
616
617
618
619
620
621
622
623
# File 'lib/daru_lite/category.rb', line 614

def describe
  DaruLite::Vector.new(
    size: size,
    categories: categories.size,
    max_freq: @cat_hash.values.map(&:size).max,
    max_category: @cat_hash.keys.max_by { |cat| @cat_hash[cat].size },
    min_freq: @cat_hash.values.map(&:size).min,
    min_category: @cat_hash.keys.min_by { |cat| @cat_hash[cat].size }
  )
end

#dupDaruLite::Vector

Duplicated a vector

Examples:

dv = DaruLite::Vector.new [:a, 1, :a, 1, :c], type: :category
dv.dup
# => #<DaruLite::Vector(5)>
#   0   a
#   1   1
#   2   a
#   3   1
#   4   c


94
95
96
97
98
99
100
101
# File 'lib/daru_lite/category.rb', line 94

def dup
  DaruLite::Vector.new to_a.dup,
                       name: @name,
                       index: @index.dup,
                       type: :category,
                       categories: categories,
                       ordered: ordered?
end

#eachEnumerator

Returns an enumerator that enumerates on categorical data



66
67
68
69
70
71
# File 'lib/daru_lite/category.rb', line 66

def each
  return enum_for(:each) unless block_given?

  @array.each { |pos| yield cat_from_int pos }
  self
end

#frequencies(type = :count) ⇒ DaruLite::Vector

Returns a vector storing count/frequency of each category

Examples:

dv = DaruLite::Vector.new [:a, 1, :a, 1, :c], type: :category
dv.frequencies
# => #<DaruLite::Vector(4)>
#   a   2
#   b   0
#   c   1
#   1   2


143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
# File 'lib/daru_lite/category.rb', line 143

def frequencies(type = :count)
  counts = @cat_hash.values.map(&:size)
  values =
    case type
    when :count
      counts
    when :fraction
      counts.map { |c| c / size.to_f }
    when :percentage
      counts.map { |c| c / size.to_f * 100 }
    else
      raise ArgumentError, 'Type should be either :count, :fraction or ' \
                           ":percentage. #{type} not supported."
    end
  DaruLite::Vector.new values, index: categories, name: name
end

#include_values?(*values) ⇒ true, false

Check if any one of mentioned values occur in the vector

Examples:

dv = DaruLite::Vector.new [1, 2, 3, 4, nil]
dv.include_values? nil, Float::NAN
# => true


661
662
663
# File 'lib/daru_lite/category.rb', line 661

def include_values?(*values)
  values.any? { |v| @cat_hash.include?(v) && !@cat_hash[v].empty? }
end

#indexes(*values) ⇒ Array

Return indexes of values specified

Examples:

dv = DaruLite::Vector.new [1, 2, nil, Float::NAN], index: 11..14
dv.indexes nil, Float::NAN
# => [13, 14]


703
704
705
706
# File 'lib/daru_lite/category.rb', line 703

def indexes(*values)
  values &= categories
  index.to_a.values_at(*values.flat_map { |v| @cat_hash[v] }.sort)
end

#initialize_category(data, opts = {}) ⇒ Object

Note:

Base category is set to the first category encountered in the vector.

Initializes a vector to store categorical data.

Examples:

dv = DaruLite::Vector.new [:a, 1, :a, 1, :c],
  type: :category,
  ordered: true,
  categories: [:a, :b, :c, 1]
# => #<DaruLite::Vector(5)>
#   0   a
#   1   1
#   2   a
#   3   1
#   4   c

Options Hash (opts):

  • :ordered (Boolean)

    true if data is ordered, false otherwise

  • :categories (Array)

    categories to associate with the vector. It add extra categories if specified and provides order of categories also.

  • :index (object)

    gives index to vector. By default its from 0 to size-1



28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# File 'lib/daru_lite/category.rb', line 28

def initialize_category(data, opts = {})
  @type = :category
  initialize_core_attributes data

  if opts[:categories]
    validate_categories(opts[:categories])
    add_extra_categories(opts[:categories] - categories)
    order_with opts[:categories]
  end

  # Specify if the categories are ordered or not.
  # By default its unordered
  @ordered = opts[:ordered] || false

  # The coding scheme to code with. Default is dummy coding.
  @coding_scheme = :dummy

  # Base category which won't be present in the coding
  @base_category = @cat_hash.keys.first

  # Stores the name of the vector
  @name = opts[:name]

  # Index of the vector
  @index = coerce_index opts[:index]

  self
end

#maxobject

Note:

This operation will only work if vector is ordered. To set the vector ordered do ‘vector.ordered = true`

Returns the maximum category acording to the order specified.

Examples:

dv = DaruLite::Vector.new ['second', 'second', 'third', 'first'],
  categories: ['first', 'second', 'third']
dv.max
# => 'third'


395
396
397
398
# File 'lib/daru_lite/category.rb', line 395

def max
  assert_ordered :max
  categories.last
end

#minobject

Note:

This operation will only work if vector is ordered. To set the vector ordered do ‘vector.ordered = true`

Returns the minimum category acording to the order specified.

Examples:

dv = DaruLite::Vector.new ['second', 'second', 'third', 'first'],
  categories: ['first', 'second', 'third']
dv.min
# => 'first'


381
382
383
384
# File 'lib/daru_lite/category.rb', line 381

def min
  assert_ordered :min
  categories.first
end

#ordered=(bool) ⇒ Object

Make categorical data ordered or unordered.

Examples:

dv = DaruLite::Vector.new [:a, 1, :a, 1, :c], type: :category
dv.ordered = true
dv.ordered?
# => true


289
290
291
# File 'lib/daru_lite/category.rb', line 289

def ordered=(bool)
  @ordered = bool
end

#ordered?Boolean

Tells whether vector is ordered or not.

Examples:

dv = DaruLite::Vector.new [:a, 1, :a, 1, :c], type: :category
dv.ordered?
# => false


278
279
280
# File 'lib/daru_lite/category.rb', line 278

def ordered?
  @ordered
end

#positions(*values) ⇒ Object



731
732
733
734
# File 'lib/daru_lite/category.rb', line 731

def positions(*values)
  values &= categories
  values.flat_map { |v| @cat_hash[v] }.sort
end

#reindex!(idx) ⇒ DaruLite::Vector

Note:

Unlike #reorder! which takes positions as input it takes index as an input to reorder the vector

Sets new index for vector. Preserves index->value correspondence.

Examples:

dv = DaruLite::Vector.new [3, 2, 1], index: ['c', 'b', 'a'], type: :category
dv.reindex! ['a', 'b', 'c']
# => #<DaruLite::Vector(3)>
#   a   1
#   b   2
#   c   3

Raises:

  • (ArgumentError)


546
547
548
549
550
551
552
553
554
555
556
557
# File 'lib/daru_lite/category.rb', line 546

def reindex!(idx)
  idx = DaruLite::Index.new idx unless idx.is_a? DaruLite::Index
  raise ArgumentError, 'Invalid index specified' unless
    idx.to_a.sort == index.to_a.sort

  old_categories = categories
  data = idx.map { |i| self[i] }
  initialize_core_attributes data
  self.categories = old_categories
  self.index = idx
  self
end

#reject_values(*values) ⇒ DaruLite::Vector

Return a vector with specified values removed

Examples:

dv = DaruLite::Vector.new [1, 2, nil, Float::NAN], type: :category
dv.reject_values nil, Float::NAN
# => #<DaruLite::Vector(2)>
#   0   1
#   1   2


674
675
676
677
678
679
680
681
682
# File 'lib/daru_lite/category.rb', line 674

def reject_values(*values)
  resultant_pos = size.times.to_a - values.flat_map { |v| @cat_hash[v] }
  dv = at(*resultant_pos)
  unless dv.is_a? DaruLite::Vector
    pos = resultant_pos.first
    dv = at(pos..pos)
  end
  dv.remove_unused_categories
end

#remove_unused_categoriesDaruLite::Vector

Note:

If base category is removed, then the first occuring category in the data is taken as base category. Order of the undeleted categories remains preserved.

Removes the unused categories

Examples:

dv = DaruLite::Vector.new [:one, :two, :one], type: :category,
  categories: [:three, :two, :one]
dv.remove_unused_categories
dv.categories
# => [:two, :one]


362
363
364
365
366
367
368
369
370
# File 'lib/daru_lite/category.rb', line 362

def remove_unused_categories
  old_categories = categories

  initialize_core_attributes to_a
  self.categories = old_categories & categories
  self.base_category = @cat_hash.keys.first unless
    categories.include? base_category
  self
end

#rename_categories(old_to_new) ⇒ Object

Note:

The order of categories after renaming is preserved but new categories are added at the end in the order. Also the base-category is reassigned to new value if it is renamed

Rename categories.

Examples:

dv = DaruLite::Vector.new [:a, 1, :a, 1, :c], type: :category
dv.rename_categories :a => :b
dv
# => #<DaruLite::Vector(5)>
#   0   b
#   1   1
#   2   b
#   3   1
#   4   c


337
338
339
340
341
342
343
344
345
346
347
348
# File 'lib/daru_lite/category.rb', line 337

def rename_categories(old_to_new)
  old_categories = categories
  data = to_a.map do |cat|
    old_to_new.include?(cat) ? old_to_new[cat] : cat
  end

  initialize_core_attributes data
  self.categories = (old_categories - old_to_new.keys) | old_to_new.values
  self.base_category = old_to_new[base_category] if
    old_to_new.include? base_category
  self
end

#reorder!(order) ⇒ Object

Note:

Unlike #reindex! which takes index as input, it takes positions as an input to reorder the vector

Reorder the vector with given positions

Examples:

dv = DaruLite::Vector.new [3, 2, 1], index: ['c', 'b', 'a'], type: :category
dv.reorder! [2, 1, 0]
# => #<DaruLite::Vector(3)>
#   a   1
#   b   2
#   c   3

Raises:

  • (ArgumentError)


523
524
525
526
527
528
529
530
531
532
# File 'lib/daru_lite/category.rb', line 523

def reorder!(order)
  raise ArgumentError, 'Invalid order specified' unless
    order.sort == size.times.to_a

  # TODO: Room for optimization
  old_data = to_a
  new_data = order.map { |i| old_data[i] }
  initialize_core_attributes new_data
  self
end

#replace_values(old_values, new_value) ⇒ DaruLite::Vector

Note:

It performs the replace in place.

Replaces specified values with a new value

Examples:

dv = DaruLite::Vector.new [1, 2, :a, :b]
dv.replace_values [:a, :b], nil
dv
# =>
# #<DaruLite::Vector:19903200 @name = nil @metadata = {} @size = 4 >
#     nil
#   0   1
#   1   2
#   2 nil
#   3 nil


725
726
727
728
729
# File 'lib/daru_lite/category.rb', line 725

def replace_values(old_values, new_value)
  old_values = [old_values] unless old_values.is_a? Array
  rename_hash = old_values.to_h { |v| [v, new_value] }
  rename_categories rename_hash
end

#set_at(positions, val) ⇒ Object

Modifies values at specified positions.

Examples:

dv = DaruLite::Vector.new [:a, 1, :a, 1, :c], type: :category
dv.add_category :b
dv.set_at [0, 1], :b
# => #<DaruLite::Vector(5)>
#   0   b
#   1   b
#   2   a
#   3   1
#   4   c


256
257
258
259
260
# File 'lib/daru_lite/category.rb', line 256

def set_at(positions, val)
  validate_positions(*positions)
  positions.map { |pos| modify_category_at pos, val }
  self
end

#sizeObject

Size of categorical data.

Examples:

dv = DaruLite::Vector.new [:a, 1, :a, 1, :c], type: :category
dv.size
# => 5


268
269
270
# File 'lib/daru_lite/category.rb', line 268

def size
  @array.size
end

#sortObject



438
439
440
# File 'lib/daru_lite/category.rb', line 438

def sort
  dup.sort!
end

#sort!DaruLite::Vector

Note:

This operation will only work if vector is ordered. To set the vector ordered, do ‘vector.ordered = true`

Sorts the vector in the order specified.

Examples:

dv = DaruLite::Vector.new ['second', 'second', 'third', 'first'],
  categories: ['first', 'second', 'thrid'],
  type: :categories,
  ordered: true
dv.sort!
# => #<DaruLite::Vector(4)>
#       3  first
#       0 second
#       1 second
#       2  third


415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
# File 'lib/daru_lite/category.rb', line 415

def sort!
  # TODO: Simply the code
  assert_ordered :sort

  # Build sorted index
  old_index = @index.to_a
  new_index = @cat_hash.values.map do |positions|
    old_index.values_at(*positions)
  end.flatten
  @index = @index.class.new new_index

  # Build sorted data
  @cat_hash = categories.inject([{}, 0]) do |acc, cat|
    hash, count = acc
    cat_count = @cat_hash[cat].size
    cat_count.times { |i| @array[count + i] = int_from_cat(cat) }
    hash[cat] = (count...(cat_count + count)).to_a
    [hash, count + cat_count]
  end.first

  self
end

#to_aArray

Returns all categorical data

Examples:

dv = DaruLite::Vector.new [:a, 1, :a, 1, :c], type: :category
dv.to_a
# => [:a, 1, :a, 1, :c]


79
80
81
# File 'lib/daru_lite/category.rb', line 79

def to_a
  each.to_a
end

#to_categoryDaruLite::Vector

Does nothing since its already of type category.



627
628
629
# File 'lib/daru_lite/category.rb', line 627

def to_category
  self
end

#to_intsArray

Returns integer coding for categorical data in the order starting from 0. For example if order is [:a, :b, :c], then :a, will be coded as 0, :b as 1 and :c as 2

Examples:

dv = DaruLite::Vector.new [:a, 1, :a, 1, :c],
  type: :category,
  categories: [:a, :b, :c, 1]
dv.to_ints
# => [0, 1, 0, 1, 2]


507
508
509
# File 'lib/daru_lite/category.rb', line 507

def to_ints
  @array
end

#to_non_categoryDaruLite::Vector

Converts a category type vector to non category type vector



633
634
635
# File 'lib/daru_lite/category.rb', line 633

def to_non_category
  DaruLite::Vector.new to_a, name: name, index: index
end

#where(bool_array) ⇒ DaruLite::Vector

For querying the data

Examples:

dv = DaruLite::Vector.new ['I', 'II', 'I', 'III', 'I', 'II'],
  type: :category,
  ordered: true,
  categories: ['I', 'II', 'III']
dv.where(dv.mt('I') & dv.lt('III'))
# => #<DaruLite::Vector(2)>
#   1  II
#   5  II


591
592
593
# File 'lib/daru_lite/category.rb', line 591

def where(bool_array)
  DaruLite::Core::Query.vector_where self, bool_array
end