Class: ClusterKit::Dimensionality::SVD

Inherits:
Object
  • Object
show all
Defined in:
lib/clusterkit/dimensionality/svd.rb

Overview

Singular Value Decomposition Decomposes a matrix into U, S, V^T components

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(n_components: nil, n_iter: 2, random_seed: nil) ⇒ SVD

Initialize a new SVD instance

Parameters:

  • n_components (Integer) (defaults to: nil)

    Number of components to compute

  • n_iter (Integer) (defaults to: 2)

    Number of iterations for randomized algorithm (default: 2)

  • random_seed (Integer, nil) (defaults to: nil)

    Random seed for reproducibility



18
19
20
21
22
23
# File 'lib/clusterkit/dimensionality/svd.rb', line 18

def initialize(n_components: nil, n_iter: 2, random_seed: nil)
  @n_components = n_components
  @n_iter = n_iter
  @random_seed = random_seed
  @fitted = false
end

Instance Attribute Details

#n_componentsObject (readonly)

Returns the value of attribute n_components.



11
12
13
# File 'lib/clusterkit/dimensionality/svd.rb', line 11

def n_components
  @n_components
end

#n_featuresObject (readonly)

Returns the value of attribute n_features.



12
13
14
# File 'lib/clusterkit/dimensionality/svd.rb', line 12

def n_features
  @n_features
end

#n_iterObject (readonly)

Returns the value of attribute n_iter.



11
12
13
# File 'lib/clusterkit/dimensionality/svd.rb', line 11

def n_iter
  @n_iter
end

#random_seedObject (readonly)

Returns the value of attribute random_seed.



11
12
13
# File 'lib/clusterkit/dimensionality/svd.rb', line 11

def random_seed
  @random_seed
end

#sObject (readonly)

Returns the value of attribute s.



12
13
14
# File 'lib/clusterkit/dimensionality/svd.rb', line 12

def s
  @s
end

#uObject (readonly)

Returns the value of attribute u.



12
13
14
# File 'lib/clusterkit/dimensionality/svd.rb', line 12

def u
  @u
end

#vtObject (readonly)

Returns the value of attribute vt.



12
13
14
# File 'lib/clusterkit/dimensionality/svd.rb', line 12

def vt
  @vt
end

Class Method Details

.randomized_svd(matrix, k, n_iter: 2) ⇒ Array

Class method for randomized SVD (kept for compatibility)

Parameters:

  • matrix (Array<Array<Numeric>>)

    Input matrix

  • k (Integer)

    Number of components

  • n_iter (Integer) (defaults to: 2)

    Number of iterations

Returns:

  • (Array)

    Returns [U, S, Vt]



128
129
130
# File 'lib/clusterkit/dimensionality/svd.rb', line 128

def self.randomized_svd(matrix, k, n_iter: 2)
  ::ClusterKit::SVD.randomized_svd_rust(matrix, k, n_iter)
end

Instance Method Details

#components_uArray<Array<Float>>

Get the U matrix (left singular vectors)

Returns:

  • (Array<Array<Float>>)

    U matrix

Raises:

  • (RuntimeError)


55
56
57
58
# File 'lib/clusterkit/dimensionality/svd.rb', line 55

def components_u
  raise RuntimeError, "Model must be fitted first" unless fitted?
  @u
end

#components_vtArray<Array<Float>>

Get the V^T matrix (right singular vectors, transposed)

Returns:

  • (Array<Array<Float>>)

    V^T matrix

Raises:

  • (RuntimeError)


69
70
71
72
# File 'lib/clusterkit/dimensionality/svd.rb', line 69

def components_vt
  raise RuntimeError, "Model must be fitted first" unless fitted?
  @vt
end

#fit(data) ⇒ self

Fit the model to data

Parameters:

  • data (Array<Array<Numeric>>)

    Input data

Returns:

  • (self)


48
49
50
51
# File 'lib/clusterkit/dimensionality/svd.rb', line 48

def fit(data)
  fit_transform(data)
  self
end

#fit_transform(data) ⇒ Array

Fit the model and transform data in one step

Parameters:

  • data (Array<Array<Numeric>>)

    Input data

Returns:

  • (Array)

    Returns [U, S, Vt] matrices



28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# File 'lib/clusterkit/dimensionality/svd.rb', line 28

def fit_transform(data)
  validate_input(data)
  
  # Store data characteristics for later transform operations
  @n_features = data.first.size
  @original_data_id = data.object_id
  
  # Determine n_components if not set
  n_comp = @n_components || [data.size, data.first.size].min
  
  # Call the Rust implementation
  @u, @s, @vt = self.class.randomized_svd(data, n_comp, n_iter: @n_iter)
  @fitted = true
  
  [@u, @s, @vt]
end

#fitted?Boolean

Check if the model has been fitted

Returns:

  • (Boolean)


76
77
78
# File 'lib/clusterkit/dimensionality/svd.rb', line 76

def fitted?
  @fitted
end

#inverse_transform(transformed_data) ⇒ Array<Array<Float>>

Inverse transform (reconstruct from components)

Parameters:

  • transformed_data (Array<Array<Float>>)

    Transformed data

Returns:

  • (Array<Array<Float>>)

    Reconstructed data

Raises:

  • (RuntimeError)


103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# File 'lib/clusterkit/dimensionality/svd.rb', line 103

def inverse_transform(transformed_data)
  raise RuntimeError, "Model must be fitted first" unless fitted?
  
  # Reconstruction: (U * S) * V^T
  # transformed_data should be U * S
  # We multiply by V^T to reconstruct
  
  result = []
  transformed_data.each do |row|
    reconstructed = Array.new(@vt.first.size, 0.0)
    row.each_with_index do |val, i|
      @vt[i].each_with_index do |v, j|
        reconstructed[j] += val * v
      end
    end
    result << reconstructed
  end
  result
end

#singular_valuesArray<Float>

Get the singular values

Returns:

  • (Array<Float>)

    Singular values

Raises:

  • (RuntimeError)


62
63
64
65
# File 'lib/clusterkit/dimensionality/svd.rb', line 62

def singular_values
  raise RuntimeError, "Model must be fitted first" unless fitted?
  @s
end

#transform(data) ⇒ Array<Array<Float>>

Transform data using fitted SVD (project onto components)

Parameters:

  • data (Array<Array<Numeric>>)

    Data to transform

Returns:

  • (Array<Array<Float>>)

    Transformed data projected onto SVD components

Raises:

  • (RuntimeError)


83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# File 'lib/clusterkit/dimensionality/svd.rb', line 83

def transform(data)
  raise RuntimeError, "Model must be fitted first" unless fitted?
  validate_transform_input(data)
  
  if data.object_id == @original_data_id
    # Same data that was fitted - return U * S
    @u.map.with_index do |row, i|
      row.map.with_index { |val, j| val * @s[j] }
    end
  else
    # New data - project onto V components: data × V
    # Since we have V^T, we need to transpose it back to V
    # V = V^T^T, so we project: data × V^T^T
    transform_new_data(data)
  end
end