Class: Statsample::Regression::Multiple::BaseEngine

Inherits:
Object
  • Object
show all
Includes:
Summarizable
Defined in:
lib/statsample/regression/multiple/baseengine.rb

Overview

Base class for Multiple Regression Engines

Direct Known Subclasses

AlglibEngine, GslEngine, MatrixEngine

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Methods included from Summarizable

#summary

Constructor Details

#initialize(ds, y_var, opts = Hash.new) ⇒ BaseEngine

Returns a new instance of BaseEngine.



20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# File 'lib/statsample/regression/multiple/baseengine.rb', line 20

def initialize(ds, y_var, opts = Hash.new)
  @ds=ds
  @predictors_n=@ds.fields.size-1
  @total_cases=@ds.cases
  @cases=@ds.cases
  @y_var=y_var
  @r2=nil
  @name=_("Multiple Regression:  %s over %s") % [ ds.fields.join(",") , @y_var]
  
  
  opts_default={:digits=>3}
  @opts=opts_default.merge opts
  
  @opts.each{|k,v|
    self.send("#{k}=",v) if self.respond_to? k
  }
  
end

Instance Attribute Details

#casesObject (readonly)

Minimum number of valid case for pairs of correlation



10
11
12
# File 'lib/statsample/regression/multiple/baseengine.rb', line 10

def cases
  @cases
end

#digitsObject

Returns the value of attribute digits.



16
17
18
# File 'lib/statsample/regression/multiple/baseengine.rb', line 16

def digits
  @digits
end

#nameObject

Name of analysis



8
9
10
# File 'lib/statsample/regression/multiple/baseengine.rb', line 8

def name
  @name
end

#total_casesObject (readonly)

Number of total cases (dataset.cases)



14
15
16
# File 'lib/statsample/regression/multiple/baseengine.rb', line 14

def total_cases
  @total_cases
end

#valid_casesObject (readonly)

Number of valid cases (listwise)



12
13
14
# File 'lib/statsample/regression/multiple/baseengine.rb', line 12

def valid_cases
  @valid_cases
end

Class Method Details

.univariate?Boolean

Returns:

  • (Boolean)


17
18
19
# File 'lib/statsample/regression/multiple/baseengine.rb', line 17

def self.univariate?
  true
end

Instance Method Details

#anovaObject

Calculate F Test



39
40
41
# File 'lib/statsample/regression/multiple/baseengine.rb', line 39

def anova
  @anova||=Statsample::Anova::OneWay.new(:ss_num=>ssr, :ss_den=>sse, :df_num=>df_r, :df_den=>df_e, :name_numerator=>_("Regression"), :name_denominator=>_("Error"), :name=>"ANOVA")
end

#assign_names(c) ⇒ Object



212
213
214
215
216
217
218
# File 'lib/statsample/regression/multiple/baseengine.rb', line 212

def assign_names(c)
  a={}
  @fields.each_index {|i|
    a[@fields[i]]=c[i]
  }
  a
end

#coeffs_seObject

Standard Error for coefficients



149
150
151
152
153
154
155
156
# File 'lib/statsample/regression/multiple/baseengine.rb', line 149

def coeffs_se
  out={}
  mse=sse.quo(df_e)
  coeffs.each {|k,v|
    out[k]=Math::sqrt(mse/(@ds[k].sum_of_squares * tolerance(k)))
  }
  out
end

#coeffs_tObject

T values for coeffs



99
100
101
102
103
104
105
106
# File 'lib/statsample/regression/multiple/baseengine.rb', line 99

def coeffs_t
  out={}
  se=coeffs_se
  coeffs.each do |k,v|
    out[k]=v / se[k]
  end
  out
end

#coeffs_tolerancesObject

Tolerances for each coefficient



142
143
144
145
146
147
# File 'lib/statsample/regression/multiple/baseengine.rb', line 142

def coeffs_tolerances
  @fields.inject({}) {|a,f|
  a[f]=tolerance(f);
    a
  }
end

#constant_seObject

Standard error for constant



182
183
184
# File 'lib/statsample/regression/multiple/baseengine.rb', line 182

def constant_se
  estimated_variance_covariance_matrix[0,0]
end

#constant_tObject

T for constant



178
179
180
# File 'lib/statsample/regression/multiple/baseengine.rb', line 178

def constant_t
  constant.to_f/constant_se
end

#df_eObject

Degrees of freedom for error



120
121
122
# File 'lib/statsample/regression/multiple/baseengine.rb', line 120

def df_e
  @valid_cases-@predictors_n-1
end

#df_rObject

Degrees of freedom for regression



116
117
118
# File 'lib/statsample/regression/multiple/baseengine.rb', line 116

def df_r
  @predictors_n
end

#estimated_variance_covariance_matrixObject

Estimated Variance-Covariance Matrix Used for calculation of se of constant



165
166
167
168
169
170
171
172
173
174
175
176
# File 'lib/statsample/regression/multiple/baseengine.rb', line 165

def estimated_variance_covariance_matrix
  #mse_p=mse
  columns=[]
  @ds_valid.fields.each{|k|
    v=@ds_valid[k]
    columns.push(v.data) unless k==@y_var
  }
  columns.unshift([1.0]*@valid_cases)
  x=Matrix.columns(columns)
  matrix=((x.t*x)).inverse * mse
  matrix.collect {|i| Math::sqrt(i) if i>=0 }
end

#fObject

Fisher for Anova



124
125
126
# File 'lib/statsample/regression/multiple/baseengine.rb', line 124

def f
  anova.f
end

#mseObject

Mean Square Error



112
113
114
# File 'lib/statsample/regression/multiple/baseengine.rb', line 112

def mse
  sse.quo(df_e)
end

#msrObject

Mean square Regression



108
109
110
# File 'lib/statsample/regression/multiple/baseengine.rb', line 108

def msr
  ssr.quo(df_r)
end

#predictedObject

Retrieves a vector with predicted values for y



47
48
49
50
51
52
53
54
55
56
57
# File 'lib/statsample/regression/multiple/baseengine.rb', line 47

def predicted
  @total_cases.times.collect { |i|
    invalid=false
    vect=@dep_columns.collect {|v| invalid=true if v[i].nil?; v[i]}
    if invalid
      nil
    else
      process(vect)
    end
  }.to_vector(:numeric)
end

#probabilityObject

p-value of Fisher



128
129
130
# File 'lib/statsample/regression/multiple/baseengine.rb', line 128

def probability
  anova.probability
end

#process(v) ⇒ Object



240
241
242
243
244
245
246
247
# File 'lib/statsample/regression/multiple/baseengine.rb', line 240

def process(v)
  c=coeffs
  total=constant
  @fields.each_index{|i|
    total+=c[@fields[i]]*v[i]
  }
  total
end

#rObject

R Multiple



75
76
77
# File 'lib/statsample/regression/multiple/baseengine.rb', line 75

def r
  raise "You should implement this"
end

#r2_adjustedObject

R^2 Adjusted. Estimate Population R^2 usign Ezequiel formula. Always lower than sample R^2

Reference:

  • Leach, L. & Henson, R. (2007). The Use and Impact of Adjusted R2 Effects in Published Regression Research. Multiple Linear Regression Viewpoints, 33(1), 1-11.



87
88
89
# File 'lib/statsample/regression/multiple/baseengine.rb', line 87

def r2_adjusted
  r2-((1-r2)*@predictors_n).quo(df_e)
end

#report_building(b) ⇒ Object



185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
# File 'lib/statsample/regression/multiple/baseengine.rb', line 185

def report_building(b)
  di="%0.#{digits}f"
  b.section(:name=>@name) do |g|
    c=coeffs
    g.text _("Engine: %s") % self.class
    g.text(_("Cases(listwise)=%d(%d)") % [@total_cases, @valid_cases])
    g.text _("R=")+(di % r)
    g.text _("R^2=")+(di % r2)
    g.text _("R^2 Adj=")+(di % r2_adjusted)
    g.text _("Std.Error R=")+ (di % se_estimate)
    
    g.text(_("Equation")+"="+ sprintf(di,constant) +" + "+ @fields.collect {|k| sprintf("#{di}%s",c[k],k)}.join(' + ') )
    
    g.parse_element(anova)
    sc=standarized_coeffs
    
    cse=coeffs_se
    g.table(:name=>_("Beta coefficients"), :header=>%w{coeff b beta se t}.collect{|field| _(field)} ) do |t|
				t.row([_("Constant"), sprintf(di, constant), "-", constant_se.nil? ? "": sprintf(di, constant_se), constant_t.nil? ? "" : sprintf(di, constant_t)])
      @fields.each do |f|
        t.row([f, sprintf(di, c[f]), sprintf(di, sc[f]), sprintf(di, cse[f]), sprintf(di, c[f].quo(cse[f]))])
      end  
    end
  end
end

#residualsObject

Retrieves a vector with residuals values for y



63
64
65
66
67
68
69
70
71
72
73
# File 'lib/statsample/regression/multiple/baseengine.rb', line 63

def residuals
  (0...@total_cases).collect{|i|
    invalid=false
    vect=@dep_columns.collect{|v| invalid=true if v[i].nil?; v[i]}
    if invalid or @ds[@y_var][i].nil?
      nil
    else
      @ds[@y_var][i] - process(vect)
    end
  }.to_vector(:numeric)
end

#se_estimateObject

Standard error of estimate



43
44
45
# File 'lib/statsample/regression/multiple/baseengine.rb', line 43

def se_estimate
  Math::sqrt(sse.quo(df_e))
end

#se_r2Object

Estandar error of R^2 ????



159
160
161
# File 'lib/statsample/regression/multiple/baseengine.rb', line 159

def se_r2
  Math::sqrt((4*r2*(1-r2)**2*(df_e)**2).quo((@cases**2-1)*(@cases+3)))
end

#sseObject

Sum of squares (Error)



95
96
97
# File 'lib/statsample/regression/multiple/baseengine.rb', line 95

def sse
  sst - ssr
end

#sse_directObject



237
238
239
# File 'lib/statsample/regression/multiple/baseengine.rb', line 237

def sse_direct
  sst-ssr
end

#ssrObject

Sum of squares (regression)



91
92
93
# File 'lib/statsample/regression/multiple/baseengine.rb', line 91

def ssr
  r2*sst
end

#ssr_directObject

Sum of squares of regression using the predicted value minus y mean



222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
# File 'lib/statsample/regression/multiple/baseengine.rb', line 222

def ssr_direct
  mean=@dy.mean
  cases=0
  ssr=(0...@ds.cases).inject(0) {|a,i|
    invalid=false
    v=@dep_columns.collect{|c| invalid=true if c[i].nil?; c[i]}
    if !invalid
      cases+=1
      a+((process(v)-mean)**2)
    else
      a
    end
  }
  ssr
end

#sstObject

Sum of squares Total



79
80
81
# File 'lib/statsample/regression/multiple/baseengine.rb', line 79

def sst
  raise "You should implement this"
end

#standarized_predictedObject

Retrieves a vector with standarized values for y



59
60
61
# File 'lib/statsample/regression/multiple/baseengine.rb', line 59

def standarized_predicted
  predicted.standarized
end

#tolerance(var) ⇒ Object

Tolerance for a given variable talkstats.com/showthread.php?t=5056



133
134
135
136
137
138
139
140
# File 'lib/statsample/regression/multiple/baseengine.rb', line 133

def tolerance(var)
  ds=assign_names(@dep_columns)
  ds.each{|k,v|
  ds[k]=v.to_vector(:numeric)
  }
  lr=self.class.new(ds.to_dataset,var)
  1-lr.r2
end