Class: Rumale::Preprocessing::OrdinalEncoder

Inherits:
Object
  • Object
show all
Includes:
Base::BaseEstimator, Base::Transformer
Defined in:
lib/rumale/preprocessing/ordinal_encoder.rb

Overview

Transfrom categorical features to integer values.

Examples:

encoder = Rumale::Preprocessing::OrdinalEncoder.new
training_samples = [['left', 10], ['right', 15], ['right', 20]]
training_samples = Numo::NArray.asarray(training_samples)
encoder.fit(training_samples)
p encoder.categories
# [["left", "right"], [10, 15, 20]]
testing_samples = [['left', 20], ['right', 10]]
testing_samples = Numo::NArray.asarray(testing_samples)
encoded = encoder.transform(testing_samples)
p encoded
# Numo::DFloat#shape=[2,2]
# [[0, 2],
#  [1, 0]]
p encoder.inverse_transform(encoded)
# Numo::RObject#shape=[2,2]
# [["left", 20],
#  ["right", 10]]

Instance Attribute Summary collapse

Attributes included from Base::BaseEstimator

#params

Instance Method Summary collapse

Constructor Details

#initialize(categories: nil) ⇒ OrdinalEncoder

Create a new encoder that transform categorical features to integer values.

Parameters:

  • categories (Nil/Array) (defaults to: nil)

    The category list for each feature. If nil is given, extracted categories from the training data by calling the fit method are used.



40
41
42
43
# File 'lib/rumale/preprocessing/ordinal_encoder.rb', line 40

def initialize(categories: nil)
  check_params_type_or_nil(Array, categories: categories)
  @categories = categories
end

Instance Attribute Details

#categoriesArray (readonly)

Return the array consists of categorical value each feature.

Returns:

  • (Array)

    (size: n_features)



34
35
36
# File 'lib/rumale/preprocessing/ordinal_encoder.rb', line 34

def categories
  @categories
end

Instance Method Details

#fit(x) ⇒ LabelEncoder

Fit encoder by extracting the category for each feature.

Parameters:

  • x (Numo::NArray)

    (shape: [n_samples, n_features]) The samples consisting of categorical features.

Returns:

Raises:

  • (TypeError)


51
52
53
54
55
56
57
# File 'lib/rumale/preprocessing/ordinal_encoder.rb', line 51

def fit(x, _y = nil)
  raise TypeError, 'Expect class of sample matrix to be Numo::NArray' unless x.is_a?(Numo::NArray)
  raise ArgumentError, 'Expect sample matrix to be 2-D array' unless x.shape.size == 2
  n_features = x.shape[1]
  @categories = Array.new(n_features) { |n| x[true, n].to_a.uniq.sort }
  self
end

#fit_transform(x) ⇒ Numo::DFloat

Fit encoder, then return encoded categorical features to integer values.

Parameters:

  • x (Numo::NArray)

    (shape: [n_samples, n_features]) The samples consisting of categorical features.

Returns:

  • (Numo::DFloat)

    The encoded categorical features to integer values.

Raises:

  • (TypeError)


65
66
67
68
69
# File 'lib/rumale/preprocessing/ordinal_encoder.rb', line 65

def fit_transform(x, _y = nil)
  raise TypeError, 'Expect class of sample matrix to be Numo::NArray' unless x.is_a?(Numo::NArray)
  raise ArgumentError, 'Expect sample matrix to be 2-D array' unless x.shape.size == 2
  fit(x).transform(x)
end

#inverse_transform(x) ⇒ Numo::NArray

Decode values to categorical features.

Parameters:

  • x (Numo::DFloat)

    (shape: [n_samples, n_features]) The samples consisting of values transformed from categorical features.

Returns:

  • (Numo::NArray)

    The decoded features.

Raises:

  • (ArgumentError)


93
94
95
96
97
98
99
100
101
102
103
104
# File 'lib/rumale/preprocessing/ordinal_encoder.rb', line 93

def inverse_transform(x)
  check_sample_array(x)

  n_features = x.shape[1]
  raise ArgumentError, 'Expect the number of features and the number of categories to be equal' if n_features != @categories.size

  inv_transformed = Array.new(n_features) do |n|
    x[true, n].to_a.map { |i| @categories[n][i.to_i] }
  end

  Numo::NArray.asarray(inv_transformed.transpose)
end

#marshal_dumpHash

Dump marshal data.

Returns:

  • (Hash)

    The marshal data about OrdinalEncoder.



108
109
110
# File 'lib/rumale/preprocessing/ordinal_encoder.rb', line 108

def marshal_dump
  { categories: @categories }
end

#marshal_load(obj) ⇒ nil

Load marshal data.

Returns:

  • (nil)


114
115
116
117
# File 'lib/rumale/preprocessing/ordinal_encoder.rb', line 114

def marshal_load(obj)
  @categories = obj[:categories]
  nil
end

#transform(x) ⇒ Numo::DFloat

Encode categorical features.

Parameters:

  • x (Numo::NArray)

    (shape: [n_samples, n_features]) The samples consisting of categorical features.

Returns:

  • (Numo::DFloat)

    The encoded categorical features to integer values.

Raises:

  • (TypeError)


75
76
77
78
79
80
81
82
83
84
85
86
87
# File 'lib/rumale/preprocessing/ordinal_encoder.rb', line 75

def transform(x)
  raise TypeError, 'Expect class of sample matrix to be Numo::NArray' unless x.is_a?(Numo::NArray)
  raise ArgumentError, 'Expect sample matrix to be 2-D array' unless x.shape.size == 2

  n_features = x.shape[1]
  raise ArgumentError, 'Expect the number of features and the number of categories to be equal' if n_features != @categories.size

  transformed = Array.new(n_features) do |n|
    x[true, n].to_a.map { |v| @categories[n].index(v) }
  end

  Numo::DFloat.asarray(transformed.transpose)
end