Class: Statsample::Factor::PCA

Inherits:
Object
  • Object
show all
Includes:
GetText
Defined in:
lib/statsample/factor/pca.rb

Overview

Principal Component Analysis (PCA) of a covariance or correlation matrix.

For Principal Axis Analysis, use Statsample::Factor::PrincipalAxis

Usage:

require 'statsample'
a=[2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1].to_scale
b=[2.4,0.7,2.9,2.2,3.0,2.7,1.6,1.1,1.6,0.9].to_scale
ds={'a'=>a,'b'=>b}.to_dataset
cor_matrix=Statsample::Bivariate.correlation_matrix(ds)
pca=Statsample::Factor::PCA.new(cor_matrix)
pca.m
=> 1
pca.eigenvalues
=> [1.92592927269225, 0.0740707273077545]
pca.component_matrix
=> GSL::Matrix
[  9.813e-01 
  9.813e-01 ]
pca.communalities
=> [0.962964636346122, 0.962964636346122]

References:

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(matrix, opts = Hash.new) ⇒ PCA

Returns a new instance of PCA.



40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# File 'lib/statsample/factor/pca.rb', line 40

def initialize(matrix ,opts=Hash.new)
  if matrix.respond_to? :to_gsl
    matrix=matrix.to_gsl
  end
  @name=""
  @matrix=matrix
  @n_variables=@matrix.size1
  @m=nil
  opts.each{|k,v|
    self.send("#{k}=",v) if self.respond_to? k
  }
  calculate_eigenpairs
  if @m.nil?
    # Set number of factors with eigenvalues > 1
    @m=@eigenpairs.find_all {|v| v[0]>=1.0}.size
  end

end

Instance Attribute Details

#mObject

Number of factors. Set by default to the number of factors with eigen values > 1



36
37
38
# File 'lib/statsample/factor/pca.rb', line 36

def m
  @m
end

#nameObject

Name of analysis



33
34
35
# File 'lib/statsample/factor/pca.rb', line 33

def name
  @name
end

Instance Method Details

#communalities(m = nil) ⇒ Object

Communalities for all variables given m factors



97
98
99
100
101
102
103
104
105
106
107
108
# File 'lib/statsample/factor/pca.rb', line 97

def communalities(m=nil)
  m||=@m
  h=[]
  @n_variables.times do |i|
    sum=0
    m.times do |j|
      sum+=@eigenpairs[j][0].abs*@eigenpairs[j][1][i]**2
    end
    h.push(sum)
  end
  h
end

#component_matrix(m = nil) ⇒ Object

Component matrix for m factors



84
85
86
87
88
89
90
91
92
93
94
95
# File 'lib/statsample/factor/pca.rb', line 84

def component_matrix(m=nil)
  m||=@m
  raise "m should be > 0" if m<1
  omega_m=GSL::Matrix.zeros(@n_variables, m)
  gammas=[]
  m.times {|i|
    omega_m.set_col(i, @eigenpairs[i][1])
    gammas.push(Math::sqrt(@eigenpairs[i][0]))
  }
  gamma_m=GSL::Matrix.diagonal(gammas)
  (omega_m*(gamma_m)).to_matrix
end

#data_transformation(data_matrix, m) ⇒ Object

data_transformation



77
78
79
80
81
82
# File 'lib/statsample/factor/pca.rb', line 77

def data_transformation(data_matrix, m)
  m||=@m
  raise "Data variables number should be equal to original variable number" if data_matrix.size2!=@n_variables
  fv=feature_vector(m)
  (fv.transpose*data_matrix.transpose).transpose
end

#eigenvaluesObject

Array with eigenvalues



110
111
112
# File 'lib/statsample/factor/pca.rb', line 110

def eigenvalues
  @eigenpairs.collect {|c| c[0] }
end

#feature_vector(m = nil) ⇒ Object

Feature vector for m factors



68
69
70
71
72
73
74
75
# File 'lib/statsample/factor/pca.rb', line 68

def feature_vector(m=nil)
  m||=@m
  omega_m=GSL::Matrix.zeros(@n_variables, m)
  m.times do |i|
    omega_m.set_col(i, @eigenpairs[i][1])
  end
  omega_m
end

#report_building(generator) ⇒ Object

:nodoc:



127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# File 'lib/statsample/factor/pca.rb', line 127

def report_building(generator) # :nodoc:
  anchor=generator.toc_entry(_("PCA: ")+name)
  generator.html "<div class='pca'>"+_("PCA")+" #{@name}<a name='#{anchor}'></a>"

  generator.text "Number of factors: #{m}"
  t=ReportBuilder::Table.new(:name=>_("Communalities"), :header=>["Variable","Initial","Extraction"])
  communalities(m).each_with_index {|com,i|
    t.row([i, 1.0, sprintf("%0.3f", com)])
  }
  generator.parse_element(t)
  
  t=ReportBuilder::Table.new(:name=>_("Eigenvalues"), :header=>["Variable","Value"])
  eigenvalues.each_with_index {|eigenvalue,i|
    t.row([i, sprintf("%0.3f",eigenvalue)])
  }
  generator.parse_element(t)
  
  t=ReportBuilder::Table.new(:name=>_("Component Matrix"), :header=>["Variable"]+m.times.collect {|c| c+1})
  
  i=0
  component_matrix(m).to_a.each do |row|
    t.row([i]+row.collect {|c| sprintf("%0.3f",c)})
    i+=1
  end
  generator.parse_element(t)
  generator.html("</div>")
end

#summaryObject



122
123
124
125
126
# File 'lib/statsample/factor/pca.rb', line 122

def summary
  rp=ReportBuilder.new()
  rp.add(self)
  rp.to_text
end