Class: Statsample::Regression::Multiple::BaseEngine
- Inherits:
-
Object
- Object
- Statsample::Regression::Multiple::BaseEngine
- Includes:
- GetText
- Defined in:
- lib/statsample/regression/multiple/baseengine.rb
Overview
Base class for Multiple Regression Engines
Direct Known Subclasses
Instance Attribute Summary collapse
-
#name ⇒ Object
Name of analysis.
Class Method Summary collapse
Instance Method Summary collapse
- #assign_names(c) ⇒ Object
-
#coeffs_se ⇒ Object
Standard Error for coefficients.
-
#coeffs_t ⇒ Object
T values for coeffs.
-
#coeffs_tolerances ⇒ Object
Tolerances for each coefficient.
-
#constant_se ⇒ Object
Standard error for constant.
-
#constant_t ⇒ Object
T for constant.
-
#df_e ⇒ Object
Degrees of freedom for error.
-
#df_r ⇒ Object
Degrees of freedom for regression.
-
#estimated_variance_covariance_matrix ⇒ Object
Estimated Variance-Covariance Matrix Used for calculation of se of constant.
-
#f ⇒ Object
Fisher for Anova.
-
#initialize(ds, y_var, opts = Hash.new) ⇒ BaseEngine
constructor
A new instance of BaseEngine.
-
#mse ⇒ Object
Mean Square Error.
-
#msr ⇒ Object
Mean square Regression.
-
#predicted ⇒ Object
Retrieves a vector with predicted values for y.
- #process(v) ⇒ Object
-
#r ⇒ Object
R Multiple.
- #report_building(b) ⇒ Object
-
#residuals ⇒ Object
Retrieves a vector with residuals values for y.
-
#se_r2 ⇒ Object
Estandar error of R.
-
#significance ⇒ Object
Significance of Fisher.
-
#sse ⇒ Object
Sum of squares (Error).
- #sse_direct ⇒ Object
-
#ssr ⇒ Object
Sum of squares (regression).
-
#ssr_direct ⇒ Object
Sum of squares of regression using the predicted value minus y mean.
-
#sst ⇒ Object
Sum of squares Total.
-
#standarized_predicted ⇒ Object
Retrieves a vector with standarized values for y.
- #summary ⇒ Object
-
#tolerance(var) ⇒ Object
Tolerance for a given variable talkstats.com/showthread.php?t=5056.
Constructor Details
#initialize(ds, y_var, opts = Hash.new) ⇒ BaseEngine
Returns a new instance of BaseEngine.
18 19 20 21 22 23 24 25 26 27 |
# File 'lib/statsample/regression/multiple/baseengine.rb', line 18 def initialize(ds, y_var, opts = Hash.new) @ds=ds @cases=@ds.cases @y_var=y_var @r2=nil @name=_("Multiple Regression: %s over %s") % [ ds.fields.join(",") , @y_var] opts.each{|k,v| self.send("#{k}=",v) if self.respond_to? k } end |
Instance Attribute Details
#name ⇒ Object
Name of analysis
10 11 12 |
# File 'lib/statsample/regression/multiple/baseengine.rb', line 10 def name @name end |
Class Method Details
.univariate? ⇒ Boolean
12 13 14 |
# File 'lib/statsample/regression/multiple/baseengine.rb', line 12 def self.univariate? true end |
Instance Method Details
#assign_names(c) ⇒ Object
191 192 193 194 195 196 197 |
# File 'lib/statsample/regression/multiple/baseengine.rb', line 191 def assign_names(c) a={} @fields.each_index {|i| a[@fields[i]]=c[i] } a end |
#coeffs_se ⇒ Object
Standard Error for coefficients
124 125 126 127 128 129 130 131 |
# File 'lib/statsample/regression/multiple/baseengine.rb', line 124 def coeffs_se out={} mse=sse.quo(df_e) coeffs.each {|k,v| out[k]=Math::sqrt(mse/(@ds[k].sum_of_squares * tolerance(k))) } out end |
#coeffs_t ⇒ Object
T values for coeffs
74 75 76 77 78 79 80 81 |
# File 'lib/statsample/regression/multiple/baseengine.rb', line 74 def coeffs_t out={} se=coeffs_se coeffs.each do |k,v| out[k]=v / se[k] end out end |
#coeffs_tolerances ⇒ Object
Tolerances for each coefficient
117 118 119 120 121 122 |
# File 'lib/statsample/regression/multiple/baseengine.rb', line 117 def coeffs_tolerances @fields.inject({}) {|a,f| a[f]=tolerance(f); a } end |
#constant_se ⇒ Object
Standard error for constant
155 156 157 |
# File 'lib/statsample/regression/multiple/baseengine.rb', line 155 def constant_se estimated_variance_covariance_matrix[0,0] end |
#constant_t ⇒ Object
T for constant
151 152 153 |
# File 'lib/statsample/regression/multiple/baseengine.rb', line 151 def constant_t constant.to_f/constant_se end |
#df_e ⇒ Object
Degrees of freedom for error
95 96 97 |
# File 'lib/statsample/regression/multiple/baseengine.rb', line 95 def df_e @ds_valid.cases-@dep_columns.size-1 end |
#df_r ⇒ Object
Degrees of freedom for regression
91 92 93 |
# File 'lib/statsample/regression/multiple/baseengine.rb', line 91 def df_r @dep_columns.size end |
#estimated_variance_covariance_matrix ⇒ Object
Estimated Variance-Covariance Matrix Used for calculation of se of constant
139 140 141 142 143 144 145 146 147 148 149 |
# File 'lib/statsample/regression/multiple/baseengine.rb', line 139 def estimated_variance_covariance_matrix mse_p=mse columns=[] @ds_valid.each_vector{|k,v| columns.push(v.data) unless k==@y_var } columns.unshift([1.0]*@ds_valid.cases) x=Matrix.columns(columns) matrix=((x.t*x)).inverse * mse matrix.collect {|i| Math::sqrt(i) if i>0 } end |
#f ⇒ Object
Fisher for Anova
99 100 101 |
# File 'lib/statsample/regression/multiple/baseengine.rb', line 99 def f (ssr.quo(df_r)).quo(sse.quo(df_e)) end |
#mse ⇒ Object
Mean Square Error
87 88 89 |
# File 'lib/statsample/regression/multiple/baseengine.rb', line 87 def mse sse.quo(df_e) end |
#msr ⇒ Object
Mean square Regression
83 84 85 |
# File 'lib/statsample/regression/multiple/baseengine.rb', line 83 def msr ssr.quo(df_r) end |
#predicted ⇒ Object
Retrieves a vector with predicted values for y
30 31 32 33 34 35 36 37 38 39 40 |
# File 'lib/statsample/regression/multiple/baseengine.rb', line 30 def predicted (0...@ds.cases).collect { |i| invalid=false vect=@dep_columns.collect {|v| invalid=true if v[i].nil?; v[i]} if invalid nil else process(vect) end }.to_vector(:scale) end |
#process(v) ⇒ Object
219 220 221 222 223 224 225 226 |
# File 'lib/statsample/regression/multiple/baseengine.rb', line 219 def process(v) c=coeffs total=constant @fields.each_index{|i| total+=c[@fields[i]]*v[i] } total end |
#r ⇒ Object
R Multiple
58 59 60 |
# File 'lib/statsample/regression/multiple/baseengine.rb', line 58 def r raise "You should implement this" end |
#report_building(b) ⇒ Object
163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 |
# File 'lib/statsample/regression/multiple/baseengine.rb', line 163 def report_building(b) b.section(:name=>_("Multiple Regression: ")+@name) do |g| c=coeffs g.text(_("Engine: %s") % self.class) g.text(_("Cases(listwise)=%d(%d)") % [@ds.cases, @ds_valid.cases]) g.text("R=#{sprintf('%0.3f',r)}") g.text("R^2=#{sprintf('%0.3f',r2)}") g.text(_("Equation")+"="+ sprintf('%0.3f',constant) +" + "+ @fields.collect {|k| sprintf('%0.3f%s',c[k],k)}.join(' + ') ) g.table(:name=>"ANOVA", :header=>%w{source ss df ms f s}) do |t| t.row([_("Regression"), sprintf("%0.3f",ssr), df_r, sprintf("%0.3f",msr), sprintf("%0.3f",f), sprintf("%0.3f", significance)]) t.row([_("Error"), sprintf("%0.3f",sse), df_e, sprintf("%0.3f",mse),"",""]) t.row([_("Total"), sprintf("%0.3f",sst), df_r+df_e,"","",""]) end sc=standarized_coeffs cse=coeffs_se g.table(:name=>"Beta coefficients", :header=>%w{coeff b beta se t}.collect{|field| _(field)} ) do |t| t.row([_("Constant"), sprintf("%0.3f", constant), "-", sprintf("%0.3f", constant_se), sprintf("%0.3f", constant_t)]) @fields.each do |f| t.row([f, sprintf("%0.3f", c[f]), sprintf("%0.3f", sc[f]), sprintf("%0.3f", cse[f]), sprintf("%0.3f", c[f].quo(cse[f]))]) end end end end |
#residuals ⇒ Object
Retrieves a vector with residuals values for y
46 47 48 49 50 51 52 53 54 55 56 |
# File 'lib/statsample/regression/multiple/baseengine.rb', line 46 def residuals (0...@ds.cases).collect{|i| invalid=false vect=@dep_columns.collect{|v| invalid=true if v[i].nil?; v[i]} if invalid or @ds[@y_var][i].nil? nil else @ds[@y_var][i] - process(vect) end }.to_vector(:scale) end |
#se_r2 ⇒ Object
Estandar error of R
133 134 135 |
# File 'lib/statsample/regression/multiple/baseengine.rb', line 133 def se_r2 Math::sqrt((4*r2*(1-r2)**2*(df_e)**2).quo((@cases**2-1)*(@cases+3))) end |
#significance ⇒ Object
Significance of Fisher
103 104 105 |
# File 'lib/statsample/regression/multiple/baseengine.rb', line 103 def significance (1.0-Distribution::F.cdf(f, df_r, df_e)).abs end |
#sse ⇒ Object
Sum of squares (Error)
70 71 72 |
# File 'lib/statsample/regression/multiple/baseengine.rb', line 70 def sse sst - ssr end |
#sse_direct ⇒ Object
216 217 218 |
# File 'lib/statsample/regression/multiple/baseengine.rb', line 216 def sse_direct sst-ssr end |
#ssr ⇒ Object
Sum of squares (regression)
66 67 68 |
# File 'lib/statsample/regression/multiple/baseengine.rb', line 66 def ssr r2*sst end |
#ssr_direct ⇒ Object
Sum of squares of regression using the predicted value minus y mean
201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 |
# File 'lib/statsample/regression/multiple/baseengine.rb', line 201 def ssr_direct mean=@dy.mean cases=0 ssr=(0...@ds.cases).inject(0) {|a,i| invalid=false v=@dep_columns.collect{|c| invalid=true if c[i].nil?; c[i]} if !invalid cases+=1 a+((process(v)-mean)**2) else a end } ssr end |
#sst ⇒ Object
Sum of squares Total
62 63 64 |
# File 'lib/statsample/regression/multiple/baseengine.rb', line 62 def sst raise "You should implement this" end |
#standarized_predicted ⇒ Object
Retrieves a vector with standarized values for y
42 43 44 |
# File 'lib/statsample/regression/multiple/baseengine.rb', line 42 def standarized_predicted predicted.standarized end |
#summary ⇒ Object
158 159 160 161 162 |
# File 'lib/statsample/regression/multiple/baseengine.rb', line 158 def summary rp=ReportBuilder.new() rp.add(self) rp.to_text end |
#tolerance(var) ⇒ Object
Tolerance for a given variable talkstats.com/showthread.php?t=5056
108 109 110 111 112 113 114 115 |
# File 'lib/statsample/regression/multiple/baseengine.rb', line 108 def tolerance(var) ds=assign_names(@dep_columns) ds.each{|k,v| ds[k]=v.to_vector(:scale) } lr=self.class.new(ds.to_dataset,var) 1-lr.r2 end |