Class: Statsample::Factor::PCA
- Inherits:
-
Object
- Object
- Statsample::Factor::PCA
- Includes:
- GetText
- Defined in:
- lib/statsample/factor/pca.rb
Overview
Principal Component Analysis (PCA) of a covariance or correlation matrix.
For Principal Axis Analysis, use Statsample::Factor::PrincipalAxis
Usage:
require 'statsample'
a=[2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1].to_scale
b=[2.4,0.7,2.9,2.2,3.0,2.7,1.6,1.1,1.6,0.9].to_scale
ds={'a'=>a,'b'=>b}.to_dataset
cor_matrix=Statsample::Bivariate.correlation_matrix(ds)
pca=Statsample::Factor::PCA.new(cor_matrix)
pca.m
=> 1
pca.eigenvalues
=> [1.92592927269225, 0.0740707273077545]
pca.component_matrix
=> GSL::Matrix
[ 9.813e-01
9.813e-01 ]
pca.communalities
=> [0.962964636346122, 0.962964636346122]
References:
-
SPSS manual
-
Smith, L. (2002). A tutorial on Principal Component Analysis. Available on courses.eas.ualberta.ca/eas570/pca_tutorial.pdf
Instance Attribute Summary collapse
-
#m ⇒ Object
Number of factors.
-
#name ⇒ Object
Name of analysis.
Instance Method Summary collapse
-
#communalities(m = nil) ⇒ Object
Communalities for all variables given m factors.
-
#component_matrix(m = nil) ⇒ Object
Component matrix for m factors.
-
#data_transformation(data_matrix, m) ⇒ Object
data_transformation.
-
#eigenvalues ⇒ Object
Array with eigenvalues.
-
#feature_vector(m = nil) ⇒ Object
Feature vector for m factors.
-
#initialize(matrix, opts = Hash.new) ⇒ PCA
constructor
A new instance of PCA.
-
#report_building(generator) ⇒ Object
:nodoc:.
- #summary ⇒ Object
Constructor Details
#initialize(matrix, opts = Hash.new) ⇒ PCA
Returns a new instance of PCA.
40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
# File 'lib/statsample/factor/pca.rb', line 40 def initialize(matrix ,opts=Hash.new) if matrix.respond_to? :to_gsl matrix=matrix.to_gsl end @name="" @matrix=matrix @n_variables=@matrix.size1 @m=nil opts.each{|k,v| self.send("#{k}=",v) if self.respond_to? k } calculate_eigenpairs if @m.nil? # Set number of factors with eigenvalues > 1 @m=@eigenpairs.find_all {|v| v[0]>=1.0}.size end end |
Instance Attribute Details
#m ⇒ Object
Number of factors. Set by default to the number of factors with eigen values > 1
36 37 38 |
# File 'lib/statsample/factor/pca.rb', line 36 def m @m end |
#name ⇒ Object
Name of analysis
33 34 35 |
# File 'lib/statsample/factor/pca.rb', line 33 def name @name end |
Instance Method Details
#communalities(m = nil) ⇒ Object
Communalities for all variables given m factors
97 98 99 100 101 102 103 104 105 106 107 108 |
# File 'lib/statsample/factor/pca.rb', line 97 def communalities(m=nil) m||=@m h=[] @n_variables.times do |i| sum=0 m.times do |j| sum+=@eigenpairs[j][0].abs*@eigenpairs[j][1][i]**2 end h.push(sum) end h end |
#component_matrix(m = nil) ⇒ Object
Component matrix for m factors
84 85 86 87 88 89 90 91 92 93 94 95 |
# File 'lib/statsample/factor/pca.rb', line 84 def component_matrix(m=nil) m||=@m raise "m should be > 0" if m<1 omega_m=GSL::Matrix.zeros(@n_variables, m) gammas=[] m.times {|i| omega_m.set_col(i, @eigenpairs[i][1]) gammas.push(Math::sqrt(@eigenpairs[i][0])) } gamma_m=GSL::Matrix.diagonal(gammas) (omega_m*(gamma_m)).to_matrix end |
#data_transformation(data_matrix, m) ⇒ Object
data_transformation
77 78 79 80 81 82 |
# File 'lib/statsample/factor/pca.rb', line 77 def data_transformation(data_matrix, m) m||=@m raise "Data variables number should be equal to original variable number" if data_matrix.size2!=@n_variables fv=feature_vector(m) (fv.transpose*data_matrix.transpose).transpose end |
#eigenvalues ⇒ Object
Array with eigenvalues
110 111 112 |
# File 'lib/statsample/factor/pca.rb', line 110 def eigenvalues @eigenpairs.collect {|c| c[0] } end |
#feature_vector(m = nil) ⇒ Object
Feature vector for m factors
68 69 70 71 72 73 74 75 |
# File 'lib/statsample/factor/pca.rb', line 68 def feature_vector(m=nil) m||=@m omega_m=GSL::Matrix.zeros(@n_variables, m) m.times do |i| omega_m.set_col(i, @eigenpairs[i][1]) end omega_m end |
#report_building(generator) ⇒ Object
:nodoc:
127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 |
# File 'lib/statsample/factor/pca.rb', line 127 def report_building(generator) # :nodoc: anchor=generator.toc_entry(_("PCA: ")+name) generator.html "<div class='pca'>"+_("PCA")+" #{@name}<a name='#{anchor}'></a>" generator.text "Number of factors: #{m}" t=ReportBuilder::Table.new(:name=>_("Communalities"), :header=>["Variable","Initial","Extraction"]) communalities(m).each_with_index {|com,i| t.row([i, 1.0, sprintf("%0.3f", com)]) } generator.parse_element(t) t=ReportBuilder::Table.new(:name=>_("Eigenvalues"), :header=>["Variable","Value"]) eigenvalues.each_with_index {|eigenvalue,i| t.row([i, sprintf("%0.3f",eigenvalue)]) } generator.parse_element(t) t=ReportBuilder::Table.new(:name=>_("Component Matrix"), :header=>["Variable"]+m.times.collect {|c| c+1}) i=0 component_matrix(m).to_a.each do |row| t.row([i]+row.collect {|c| sprintf("%0.3f",c)}) i+=1 end generator.parse_element(t) generator.html("</div>") end |
#summary ⇒ Object
122 123 124 125 126 |
# File 'lib/statsample/factor/pca.rb', line 122 def summary rp=ReportBuilder.new() rp.add(self) rp.to_text end |