Class: Statsample::Regression::Multiple::RubyEngine

Inherits:
MatrixEngine show all
Defined in:
lib/statsample/regression/multiple/rubyengine.rb

Overview

Pure Ruby Class for Multiple Regression Analysis. Slower than AlglibEngine, but is pure ruby and can use a pairwise aproach for missing values. Coeffient calculation uses correlation matrix between the vectors If you need listwise aproach for missing values, use AlglibEngine, because is faster.

Example:

@a = Daru::Vector.new([1,3,2,4,3,5,4,6,5,7])
@b = Daru::Vector.new([3,3,4,4,5,5,6,6,4,4])
@c = Daru::Vector.new([11,22,30,40,50,65,78,79,99,100])
@y = Daru::Vector.new([3,4,5,6,7,8,9,10,20,30])
ds = Daru::DataFrame.new({:a => @a,:b => @b,:c => @c,:y => @y})
lr=Statsample::Regression::Multiple::RubyEngine.new(ds,:y)

Instance Attribute Summary

Attributes inherited from MatrixEngine

#cases, #digits, #x_mean, #x_sd, #y_mean, #y_sd

Attributes inherited from BaseEngine

#cases, #digits, #name, #total_cases, #valid_cases

Instance Method Summary collapse

Methods inherited from MatrixEngine

#coeffs, #coeffs_se, #constant, #constant_t, #df_e, #df_r, #r, #r2, #sst, #standarized_coeffs, #tolerance

Methods inherited from BaseEngine

#anova, #assign_names, #coeffs_se, #coeffs_t, #coeffs_tolerances, #constant_t, #df_e, #df_r, #estimated_variance_covariance_matrix, #f, #mse, #msr, #predicted, #probability, #process, #r, #r2_adjusted, #report_building, #residuals, #se_estimate, #se_r2, #sse, #sse_direct, #ssr, #ssr_direct, #sst, #standarized_predicted, #tolerance, univariate?

Methods included from Summarizable

#summary

Constructor Details

#initialize(ds, y_var, opts = Hash.new) ⇒ RubyEngine


19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# File 'lib/statsample/regression/multiple/rubyengine.rb', line 19

def initialize(ds,y_var, opts=Hash.new)
  matrix = Statsample::Bivariate.correlation_matrix ds
  fields_indep=ds.vectors.to_a - [y_var]
  default= {
    :y_mean => ds[y_var].mean,
    :x_mean => fields_indep.inject({}) {|ac,f|  ac[f]=ds[f].mean; ac},
    :y_sd   => ds[y_var].sd,
    :x_sd   => fields_indep.inject({}) {|ac,f|  ac[f]=ds[f].sd; ac},
    :cases  => Statsample::Bivariate.min_n_valid(ds)
  }
  opts = opts.merge(default)
  super(matrix, y_var, opts)
  @ds = ds
  @dy = ds[@y_var]
  @ds_valid = ds.dup_only_valid
  @total_cases = @ds.nrows
  @valid_cases = @ds_valid.nrows
  @ds_indep    = ds.dup(ds.vectors.to_a - [y_var])
  set_dep_columns
end

Instance Method Details

#constant_seObject

Standard error for constant


82
83
84
# File 'lib/statsample/regression/multiple/rubyengine.rb', line 82

def constant_se
  estimated_variance_covariance_matrix[0,0]
end

#fix_with_meanObject


45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# File 'lib/statsample/regression/multiple/rubyengine.rb', line 45

def fix_with_mean
  i=0
  @ds_indep.each(:row) do |row|
    empty=[]
    row.each do |k,v|
      empty.push(k) if v.nil?
    end

    if empty.size==1
      @ds_indep[empty[0]][i]=@ds[empty[0]].mean
    end
    i += 1
  end
  @ds_indep.update
  set_dep_columns
end

#fix_with_regressionObject


61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# File 'lib/statsample/regression/multiple/rubyengine.rb', line 61

def fix_with_regression
  i = 0
  @ds_indep.each(:row) do |row|
    empty = []
    row.each { |k,v| empty.push(k) if v.nil? }
    if empty.size==1
      field  = empty[0]
      lr     = MultipleRegression.new(@ds_indep,field)
      fields = []
      @ds_indep.vectors.each { |f|
        fields.push(row[f]) unless f == field
      }
      
      @ds_indep[field][i]=lr.process(fields)
    end
    i+=1
  end
  @ds_indep.update
  set_dep_columns
end

#set_dep_columnsObject


40
41
42
43
# File 'lib/statsample/regression/multiple/rubyengine.rb', line 40

def set_dep_columns
  @dep_columns = []
  @ds_indep.each_vector { |v| @dep_columns.push(v.to_a) }                
end