Class: MLAI::MultipleLinearRegression

Inherits:
Object
  • Object
show all
Defined in:
lib/ml_ai/multiple_linear_regression.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(alpha = 1e-8, regularization: 0.0) ⇒ MultipleLinearRegression

Returns a new instance of MultipleLinearRegression.



10
11
12
13
14
15
# File 'lib/ml_ai/multiple_linear_regression.rb', line 10

def initialize(alpha = 1e-8, regularization: 0.0)
  @coefficients = nil
  @intercept = nil
  @alpha = alpha # Small value to avoid singular matrix in inversion
  @regularization = regularization # Regularization strength for Ridge Regression
end

Instance Attribute Details

#coefficientsObject (readonly)

Returns the value of attribute coefficients.



8
9
10
# File 'lib/ml_ai/multiple_linear_regression.rb', line 8

def coefficients
  @coefficients
end

#interceptObject (readonly)

Returns the value of attribute intercept.



8
9
10
# File 'lib/ml_ai/multiple_linear_regression.rb', line 8

def intercept
  @intercept
end

#regularizationObject (readonly)

Returns the value of attribute regularization.



8
9
10
# File 'lib/ml_ai/multiple_linear_regression.rb', line 8

def regularization
  @regularization
end

Instance Method Details

#cross_validate(x_values: nil, y_values: nil, dataset: nil, feature_columns: nil, target_column: nil, k: 5) ⇒ Object

Cross-validation method to evaluate model performance



79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# File 'lib/ml_ai/multiple_linear_regression.rb', line 79

def cross_validate(x_values: nil, y_values: nil, dataset: nil, feature_columns: nil, target_column: nil, k: 5)
  if dataset
    # Extract feature and target columns from the dataset
    feature_indices = feature_columns.map { |col| dataset.headers.index(col) }
    target_index = dataset.headers.index(target_column)

    x_values = dataset.data.map { |row| feature_indices.map { |i| row[i] } }
    y_values = dataset.data.map { |row| row[target_index] }
  end

  raise "Input arrays must have the same length" unless x_values.length == y_values.length

  fold_size = x_values.length / k
  errors = []

  k.times do |i|
    test_start = i * fold_size
    test_end = test_start + fold_size

    x_train = x_values[0...test_start] + x_values[test_end..-1]
    y_train = y_values[0...test_start] + y_values[test_end..-1]

    x_test = x_values[test_start...test_end]
    y_test = y_values[test_start...test_end]

    fit(x_values: x_train, y_values: y_train)
    predictions = predict(x_test)

    errors << mean_squared_error(y_test, predictions)
  end

  errors.sum / errors.size.to_f
end

#fit(x_values: nil, y_values: nil, dataset: nil, feature_columns: nil, target_column: nil) ⇒ Object

Fit method accepts either x_values and y_values or a Dataset object with specified columns



18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# File 'lib/ml_ai/multiple_linear_regression.rb', line 18

def fit(x_values: nil, y_values: nil, dataset: nil, feature_columns: nil, target_column: nil)
  if dataset
    # Extract feature and target columns from the dataset
    feature_indices = feature_columns.map { |col| dataset.headers.index(col) }
    target_index = dataset.headers.index(target_column)

    x_values = dataset.data.map { |row| feature_indices.map { |i| row[i] } }
    y_values = dataset.data.map { |row| row[target_index] }
  end

  raise "Input arrays must have the same length" unless x_values.length == y_values.length

  # Convert x_values to a matrix and add a column of ones for the intercept
  x_matrix = Matrix[*x_values.map { |x| [1] + x }]
  y_vector = Vector.elements(y_values)

  # Calculate coefficients using the normal equation with regularization: (X^T * X + λI)^-1 * X^T * Y
  x_transpose = x_matrix.transpose
  regularization_matrix = Matrix.build(x_matrix.column_count) { |i, j| i == j ? @regularization : 0 }
  
  xtx = x_transpose * x_matrix + regularization_matrix

  begin
    theta = xtx.inverse * x_transpose * y_vector
  rescue ExceptionForMatrix::ErrNotRegular
    raise "Matrix is singular or nearly singular, consider increasing regularization"
  end

  @intercept = theta[0]
  @coefficients = theta.to_a[1..-1]
end

#mean_squared_error(y_true, y_pred) ⇒ Object



60
61
62
63
64
65
66
# File 'lib/ml_ai/multiple_linear_regression.rb', line 60

def mean_squared_error(y_true, y_pred)
  raise "Input arrays must have the same length" unless y_true.length == y_pred.length

  n = y_true.length
  sum_squared_errors = y_true.each_with_index.map { |y, i| (y - y_pred[i]) ** 2 }.sum
  sum_squared_errors / n.to_f
end

#predict(x_values) ⇒ Object



50
51
52
53
54
55
56
57
58
# File 'lib/ml_ai/multiple_linear_regression.rb', line 50

def predict(x_values)
  raise "Model has not been fitted yet" if @coefficients.nil? || @intercept.nil?

  x_values.map do |x|
    @coefficients.each_with_index.reduce(@intercept) do |sum, (coef, i)|
      sum + coef * x[i]
    end
  end
end

#r_squared(y_true, y_pred) ⇒ Object



68
69
70
71
72
73
74
75
76
# File 'lib/ml_ai/multiple_linear_regression.rb', line 68

def r_squared(y_true, y_pred)
  raise "Input arrays must have the same length" unless y_true.length == y_pred.length

  mean_y = y_true.sum / y_true.length.to_f
  ss_total = y_true.map { |y| (y - mean_y) ** 2 }.sum
  ss_residual = y_true.each_with_index.map { |y, i| (y - y_pred[i]) ** 2 }.sum

  1 - (ss_residual / ss_total.to_f)
end