Class: Statsample::Regression::Multiple::BaseEngine

Inherits:
Object
  • Object
show all
Includes:
GetText
Defined in:
lib/statsample/regression/multiple/baseengine.rb

Overview

Base class for Multiple Regression Engines

Direct Known Subclasses

AlglibEngine, GslEngine, MatrixEngine

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(ds, y_var, opts = Hash.new) ⇒ BaseEngine

Returns a new instance of BaseEngine.



18
19
20
21
22
23
24
25
26
27
# File 'lib/statsample/regression/multiple/baseengine.rb', line 18

def initialize(ds, y_var, opts = Hash.new)
  @ds=ds
  @cases=@ds.cases
  @y_var=y_var
  @r2=nil
  @name=_("Multiple Regression:  %s over %s") % [ ds.fields.join(",") , @y_var]
  opts.each{|k,v|
    self.send("#{k}=",v) if self.respond_to? k
  }
end

Instance Attribute Details

#nameObject

Name of analysis



10
11
12
# File 'lib/statsample/regression/multiple/baseengine.rb', line 10

def name
  @name
end

Class Method Details

.univariate?Boolean

Returns:

  • (Boolean)


12
13
14
# File 'lib/statsample/regression/multiple/baseengine.rb', line 12

def self.univariate?
  true
end

Instance Method Details

#assign_names(c) ⇒ Object



191
192
193
194
195
196
197
# File 'lib/statsample/regression/multiple/baseengine.rb', line 191

def assign_names(c)
  a={}
  @fields.each_index {|i|
    a[@fields[i]]=c[i]
  }
  a
end

#coeffs_seObject

Standard Error for coefficients



124
125
126
127
128
129
130
131
# File 'lib/statsample/regression/multiple/baseengine.rb', line 124

def coeffs_se
  out={}
  mse=sse.quo(df_e)
  coeffs.each {|k,v|
    out[k]=Math::sqrt(mse/(@ds[k].sum_of_squares * tolerance(k)))
  }
  out
end

#coeffs_tObject

T values for coeffs



74
75
76
77
78
79
80
81
# File 'lib/statsample/regression/multiple/baseengine.rb', line 74

def coeffs_t
  out={}
  se=coeffs_se
  coeffs.each do |k,v|
    out[k]=v / se[k]
  end
  out
end

#coeffs_tolerancesObject

Tolerances for each coefficient



117
118
119
120
121
122
# File 'lib/statsample/regression/multiple/baseengine.rb', line 117

def coeffs_tolerances
  @fields.inject({}) {|a,f|
  a[f]=tolerance(f);
    a
  }
end

#constant_seObject

Standard error for constant



155
156
157
# File 'lib/statsample/regression/multiple/baseengine.rb', line 155

def constant_se
  estimated_variance_covariance_matrix[0,0]
end

#constant_tObject

T for constant



151
152
153
# File 'lib/statsample/regression/multiple/baseengine.rb', line 151

def constant_t
  constant.to_f/constant_se
end

#df_eObject

Degrees of freedom for error



95
96
97
# File 'lib/statsample/regression/multiple/baseengine.rb', line 95

def df_e
  @ds_valid.cases-@dep_columns.size-1
end

#df_rObject

Degrees of freedom for regression



91
92
93
# File 'lib/statsample/regression/multiple/baseengine.rb', line 91

def df_r
  @dep_columns.size
end

#estimated_variance_covariance_matrixObject

Estimated Variance-Covariance Matrix Used for calculation of se of constant



139
140
141
142
143
144
145
146
147
148
149
# File 'lib/statsample/regression/multiple/baseengine.rb', line 139

def estimated_variance_covariance_matrix
  mse_p=mse
  columns=[]
  @ds_valid.each_vector{|k,v|
    columns.push(v.data) unless k==@y_var
  }
  columns.unshift([1.0]*@ds_valid.cases)
  x=Matrix.columns(columns)
  matrix=((x.t*x)).inverse * mse
  matrix.collect {|i| Math::sqrt(i) if i>0 }
end

#fObject

Fisher for Anova



99
100
101
# File 'lib/statsample/regression/multiple/baseengine.rb', line 99

def f
  (ssr.quo(df_r)).quo(sse.quo(df_e))
end

#mseObject

Mean Square Error



87
88
89
# File 'lib/statsample/regression/multiple/baseengine.rb', line 87

def mse
  sse.quo(df_e)
end

#msrObject

Mean square Regression



83
84
85
# File 'lib/statsample/regression/multiple/baseengine.rb', line 83

def msr
  ssr.quo(df_r)
end

#predictedObject

Retrieves a vector with predicted values for y



30
31
32
33
34
35
36
37
38
39
40
# File 'lib/statsample/regression/multiple/baseengine.rb', line 30

def predicted
  (0...@ds.cases).collect { |i|
  invalid=false
  vect=@dep_columns.collect {|v| invalid=true if v[i].nil?; v[i]}
  if invalid
    nil
  else
    process(vect)
  end
  }.to_vector(:scale)
end

#process(v) ⇒ Object



219
220
221
222
223
224
225
226
# File 'lib/statsample/regression/multiple/baseengine.rb', line 219

def process(v)
  c=coeffs
  total=constant
  @fields.each_index{|i|
    total+=c[@fields[i]]*v[i]
  }
  total
end

#rObject

R Multiple



58
59
60
# File 'lib/statsample/regression/multiple/baseengine.rb', line 58

def r
  raise "You should implement this"
end

#report_building(b) ⇒ Object



163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
# File 'lib/statsample/regression/multiple/baseengine.rb', line 163

def report_building(b)
  b.section(:name=>_("Multiple Regression: ")+@name) do |g|
    c=coeffs
    g.text(_("Engine: %s") % self.class)
    g.text(_("Cases(listwise)=%d(%d)") % [@ds.cases, @ds_valid.cases])
    g.text("R=#{sprintf('%0.3f',r)}")
    g.text("R^2=#{sprintf('%0.3f',r2)}")
    
    g.text(_("Equation")+"="+ sprintf('%0.3f',constant) +" + "+ @fields.collect {|k| sprintf('%0.3f%s',c[k],k)}.join(' + ') )
    
    g.table(:name=>"ANOVA", :header=>%w{source ss df ms f s}) do |t|
      t.row([_("Regression"), sprintf("%0.3f",ssr), df_r, sprintf("%0.3f",msr), sprintf("%0.3f",f), sprintf("%0.3f", significance)])
      t.row([_("Error"), sprintf("%0.3f",sse), df_e, sprintf("%0.3f",mse),"",""])
      
      t.row([_("Total"), sprintf("%0.3f",sst), df_r+df_e,"","",""])
    end
    sc=standarized_coeffs
    cse=coeffs_se
    g.table(:name=>"Beta coefficients", :header=>%w{coeff b beta se t}.collect{|field| _(field)} ) do |t|
      t.row([_("Constant"), sprintf("%0.3f", constant), "-", sprintf("%0.3f", constant_se), sprintf("%0.3f", constant_t)])
      @fields.each do |f|
        t.row([f, sprintf("%0.3f", c[f]), sprintf("%0.3f", sc[f]), sprintf("%0.3f", cse[f]), sprintf("%0.3f", c[f].quo(cse[f]))])
      end  
    end
  end
end

#residualsObject

Retrieves a vector with residuals values for y



46
47
48
49
50
51
52
53
54
55
56
# File 'lib/statsample/regression/multiple/baseengine.rb', line 46

def residuals
  (0...@ds.cases).collect{|i|
    invalid=false
    vect=@dep_columns.collect{|v| invalid=true if v[i].nil?; v[i]}
    if invalid or @ds[@y_var][i].nil?
      nil
    else
      @ds[@y_var][i] - process(vect)
    end
  }.to_vector(:scale)
end

#se_r2Object

Estandar error of R



133
134
135
# File 'lib/statsample/regression/multiple/baseengine.rb', line 133

def se_r2
  Math::sqrt((4*r2*(1-r2)**2*(df_e)**2).quo((@cases**2-1)*(@cases+3)))
end

#significanceObject

Significance of Fisher



103
104
105
# File 'lib/statsample/regression/multiple/baseengine.rb', line 103

def significance
  (1.0-Distribution::F.cdf(f, df_r, df_e)).abs
end

#sseObject

Sum of squares (Error)



70
71
72
# File 'lib/statsample/regression/multiple/baseengine.rb', line 70

def sse
  sst - ssr
end

#sse_directObject



216
217
218
# File 'lib/statsample/regression/multiple/baseengine.rb', line 216

def sse_direct
  sst-ssr
end

#ssrObject

Sum of squares (regression)



66
67
68
# File 'lib/statsample/regression/multiple/baseengine.rb', line 66

def ssr
  r2*sst
end

#ssr_directObject

Sum of squares of regression using the predicted value minus y mean



201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
# File 'lib/statsample/regression/multiple/baseengine.rb', line 201

def ssr_direct
  mean=@dy.mean
  cases=0
  ssr=(0...@ds.cases).inject(0) {|a,i|
    invalid=false
    v=@dep_columns.collect{|c| invalid=true if c[i].nil?; c[i]}
    if !invalid
      cases+=1
      a+((process(v)-mean)**2)
    else
      a
    end
  }
  ssr
end

#sstObject

Sum of squares Total



62
63
64
# File 'lib/statsample/regression/multiple/baseengine.rb', line 62

def sst
  raise "You should implement this"
end

#standarized_predictedObject

Retrieves a vector with standarized values for y



42
43
44
# File 'lib/statsample/regression/multiple/baseengine.rb', line 42

def standarized_predicted
  predicted.standarized
end

#summaryObject



158
159
160
161
162
# File 'lib/statsample/regression/multiple/baseengine.rb', line 158

def summary
  rp=ReportBuilder.new()
  rp.add(self)
  rp.to_text
end

#tolerance(var) ⇒ Object

Tolerance for a given variable talkstats.com/showthread.php?t=5056



108
109
110
111
112
113
114
115
# File 'lib/statsample/regression/multiple/baseengine.rb', line 108

def tolerance(var)
  ds=assign_names(@dep_columns)
  ds.each{|k,v|
  ds[k]=v.to_vector(:scale)
  }
  lr=self.class.new(ds.to_dataset,var)
  1-lr.r2
end