Class: Statsample::Factor::PCA

Inherits:
Object
  • Object
show all
Includes:
Summarizable
Defined in:
lib/statsample/factor/pca.rb

Overview

Principal Component Analysis (PCA) of a covariance or correlation matrix..

NOTE: Sign of second and later eigenvalues could be different using Ruby or GSL, so values for PCs and component matrix should differ, because extendmatrix and gsl’s methods to calculate eigenvectors are different. Using R is worse, cause first eigenvector could have negative values! For Principal Axis Analysis, use Statsample::Factor::PrincipalAxis

Usage:

require 'statsample'
a = Daru::Vector.new([2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1])
b = Daru::Vector.new([2.4,0.7,2.9,2.2,3.0,2.7,1.6,1.1,1.6,0.9])
ds = Daru::DataFrame.new({:a => a,:b => b})
cor_matrix = Statsample::Bivariate.correlation_matrix(ds)
pca=  Statsample::Factor::PCA.new(cor_matrix)
pca.m
=> 1
pca.eigenvalues
=> [1.92592927269225, 0.0740707273077545]
pca.component_matrix
=> GSL::Matrix
[  9.813e-01 
  9.813e-01 ]
pca.communalities
=> [0.962964636346122, 0.962964636346122]

References:

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Summarizable

#summary

Constructor Details

#initialize(matrix, opts = Hash.new) ⇒ PCA

Returns a new instance of PCA.



54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# File 'lib/statsample/factor/pca.rb', line 54

def initialize(matrix, opts=Hash.new)
  @use_gsl = opts[:use_gsl]
  opts.delete :use_gsl

  @name=_("Principal Component Analysis")
  @matrix=matrix
  @n_variables=@matrix.column_size      
  @variables_names=(@matrix.respond_to? :fields) ? @matrix.fields : @n_variables.times.map {|i| "VAR_#{i+1}".to_sym }
  
  @matrix_type = @matrix.respond_to?(:_type) ? @matrix._type : :correlation
  
  @m=nil
  
  @rotation_type=Statsample::Factor::Varimax
  
  opts.each{|k,v|
    self.send("#{k}=",v) if self.respond_to? k
  }

  if @use_gsl.nil?
    @use_gsl=Statsample.has_gsl?
  end
  if @matrix.respond_to? :fields
    @variables_names=@matrix.fields
  else
    @variables_names=@n_variables.times.map {|i| "V#{i+1}".to_sym}
  end
  calculate_eigenpairs
  
  if @m.nil?
    # Set number of factors with eigenvalues > 1
    @m=@eigenpairs.find_all {|ev,ec| ev>=1.0}.size
  end
end

Instance Attribute Details

#mObject

Number of factors. Set by default to the number of factors with eigen values > 1



44
45
46
# File 'lib/statsample/factor/pca.rb', line 44

def m
  @m
end

#matrix_typeObject

Returns the value of attribute matrix_type.



53
54
55
# File 'lib/statsample/factor/pca.rb', line 53

def matrix_type
  @matrix_type
end

#nameObject

Name of analysis



40
41
42
# File 'lib/statsample/factor/pca.rb', line 40

def name
  @name
end

#rotation_typeObject

Type of rotation. By default, Statsample::Factor::Rotation::Varimax



52
53
54
# File 'lib/statsample/factor/pca.rb', line 52

def rotation_type
  @rotation_type
end

#summary_parallel_analysisObject

Add to the summary a parallel analysis report



50
51
52
# File 'lib/statsample/factor/pca.rb', line 50

def summary_parallel_analysis
  @summary_parallel_analysis
end

#summary_rotationObject

Add to the summary a rotation report



48
49
50
# File 'lib/statsample/factor/pca.rb', line 48

def summary_rotation
  @summary_rotation
end

#use_gslObject

Use GSL if available



46
47
48
# File 'lib/statsample/factor/pca.rb', line 46

def use_gsl
  @use_gsl
end

Instance Method Details

#communalities(m = nil) ⇒ Object



188
189
190
191
192
193
194
195
196
197
198
199
# File 'lib/statsample/factor/pca.rb', line 188

def communalities(m=nil)
  m||=@m
  h=[]
  @n_variables.times do |i|
    sum=0
    m.times do |j|
      sum += (@eigenpairs[j][0].abs*@eigenpairs[j][1][i]**2)
    end
    h.push(sum)
  end
  h
end

#component_matrix(m = nil) ⇒ Object



145
146
147
148
# File 'lib/statsample/factor/pca.rb', line 145

def component_matrix(m=nil)
  var="component_matrix_#{matrix_type}"
  send(var,m)
end

#component_matrix_correlation(m = nil) ⇒ Object

Matrix with correlations between components and variables



170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
# File 'lib/statsample/factor/pca.rb', line 170

def component_matrix_correlation(m=nil)
  m||=@m
  raise "m should be > 0" if m<1
  omega_m=::Matrix.build(@n_variables, m) {0}
  gammas=[]
  m.times {|i|
    omega_m.column=i, @eigenpairs[i][1]
    gammas.push(Math::sqrt(@eigenpairs[i][0]))
  }
  gamma_m=::Matrix.diagonal(*gammas)
  cm=(omega_m*(gamma_m)).to_matrix
  
  cm.extend CovariateMatrix
  cm.name=_("Component matrix")
  cm.fields_x = @variables_names
  cm.fields_y = m.times.map { |i| "PC_#{i+1}".to_sym }
  cm
end

#component_matrix_covariance(m = nil) ⇒ Object

Matrix with correlations between components and variables. Based on Härdle & Simar (2003, p.243)



151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
# File 'lib/statsample/factor/pca.rb', line 151

def component_matrix_covariance(m=nil)
  m||=@m
  raise "m should be > 0" if m<1
  ff=feature_matrix(m)
  cm=::Matrix.build(@n_variables, m) {0}
  @n_variables.times {|i|
    m.times {|j|
      cm[i,j]=ff[i,j] * Math.sqrt(eigenvalues[j] / @matrix[i,i])
    }
  }
  cm.extend NamedMatrix
  cm.name=_("Component matrix (from covariance)")
  cm.fields_x = @variables_names
  cm.fields_y = m.times.map {|i| "PC_#{i+1}".to_sym }
  
  cm
end

#eigenvaluesObject

Array with eigenvalues



201
202
203
# File 'lib/statsample/factor/pca.rb', line 201

def eigenvalues
  @eigenpairs.collect {|c| c[0] }
end

#eigenvectorsObject



204
205
206
207
208
# File 'lib/statsample/factor/pca.rb', line 204

def eigenvectors
  @eigenpairs.collect {|c| 
    @use_gsl ? c[1].to_gsl : Daru::Vector.new(c[1])
  }
end

#feature_matrix(m = nil) ⇒ Object

Feature matrix for m factors Returns m eigenvectors as columns. So, i=variable, j=component



106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# File 'lib/statsample/factor/pca.rb', line 106

def feature_matrix(m=nil)
  m||=@m
  if @use_gsl
    omega_m=GSL::Matrix.zeros(@n_variables,m)
    ev=eigenvectors
    m.times do |i|
      omega_m.set_column(i,ev[i])
    end
    omega_m
  else
    omega_m=::Matrix.build(@n_variables, m) {0}
    m.times do |i|
      omega_m.column= i, @eigenpairs[i][1]
    end
    omega_m
  end
end

#principal_components(input, m = nil) ⇒ Object

Returns Principal Components for input matrix or dataset The number of PC to return is equal to parameter m. If m isn’t set, m set to number of PCs selected at object creation. Use covariance matrix



128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# File 'lib/statsample/factor/pca.rb', line 128

def principal_components(input, m=nil)
  if @use_gsl
    data_matrix=input.to_gsl
  else
    data_matrix=input.to_matrix
  end
  m||=@m
  
  raise "data matrix variables<>pca variables" if data_matrix.column_size!=@n_variables
  
  fv=feature_matrix(m)
  pcs=(fv.transpose*data_matrix.transpose).transpose
  
  pcs.extend Statsample::NamedMatrix
  pcs.fields_y = m.times.map { |i| "PC_#{i+1}".to_sym }
  pcs.to_dataframe
end

#report_building(builder) ⇒ Object

:nodoc:



214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
# File 'lib/statsample/factor/pca.rb', line 214

def report_building(builder) # :nodoc:
  builder.section(:name=>@name) do |generator|
    generator.text _("Number of factors: %d") % m
    generator.table(:name=>_("Communalities"), :header=>[_("Variable"),_("Initial"),_("Extraction"), _("%")]) do |t|
      communalities(m).each_with_index {|com, i|
        perc=com*100.quo(@matrix[i,i])
        t.row([@variables_names[i], "%0.3f" % @matrix[i,i]  , "%0.3f" % com, "%0.3f" % perc])
      }
    end
    te=total_eigenvalues
    generator.table(:name=>_("Total Variance Explained"), :header=>[_("Component"), _("E.Total"), _("%"), _("Cum. %")]) do |t|
      ac_eigen=0
      eigenvalues.each_with_index {|eigenvalue,i|
        ac_eigen+=eigenvalue
        t.row([_("Component %d") % (i+1), sprintf("%0.3f",eigenvalue), sprintf("%0.3f%%", eigenvalue*100.quo(te)), sprintf("%0.3f",ac_eigen*100.quo(te))])
      }
    end
    
    generator.parse_element(component_matrix(m))
              
    if (summary_rotation)
      generator.parse_element(rotation)
    end
  end
end

#rotationObject



88
89
90
# File 'lib/statsample/factor/pca.rb', line 88

def rotation
  @rotation_type.new(component_matrix)
end

#total_eigenvaluesObject



91
92
93
# File 'lib/statsample/factor/pca.rb', line 91

def total_eigenvalues
  eigenvalues.inject(0) {|ac,v| ac+v}
end