Class: OpenTox::Algorithm::Caret

Inherits:
Object
  • Object
show all
Defined in:
lib/caret.rb,
lib/caret-classification.rb

Overview

Ruby interface for the R caret package Caret model list: topepo.github.io/caret/modelList.html

Class Method Summary collapse

Class Method Details

.create_model_and_predict(dependent_variables:, independent_variables:, weights:, method:, query_variables:) ⇒ Hash

Create a local R caret model and make a prediction

Parameters:

Returns:

  • (Hash)


15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# File 'lib/caret.rb', line 15

def self.create_model_and_predict dependent_variables:, independent_variables:, weights:, method:, query_variables:
  remove = []
  # remove independent_variables with single values
  independent_variables.each_with_index { |values,i| remove << i if values.uniq.size == 1}
  remove.sort.reverse.each do |i|
    independent_variables.delete_at i
    query_variables.delete_at i
  end
  if independent_variables.flatten.uniq == ["NA"] or independent_variables.flatten.uniq == [] 
    prediction = Algorithm::Regression::weighted_average dependent_variables:dependent_variables, weights:weights
    prediction[:warnings] = ["No variables for regression model, using weighted average of similar substances (no prediction interval available)."]
  elsif
    dependent_variables.size < 3
    prediction = Algorithm::Regression::weighted_average dependent_variables:dependent_variables, weights:weights
    prediction[:warnings] = ["Insufficient number of neighbors (#{dependent_variables.size}) for regression model, using weighted average of similar substances (no prediction interval available)."]
  else
    dependent_variables.each_with_index do |v,i| 
      dependent_variables[i] = to_r(v)
    end
    independent_variables.each_with_index do |c,i| 
      c.each_with_index do |v,j|
        independent_variables[i][j] = to_r(v)
      end
    end
    query_variables.each_with_index do |v,i| 
      query_variables[i] = to_r(v)
    end
    begin
      R.assign "weights", weights
      r_data_frame = "data.frame(#{([dependent_variables]+independent_variables).collect{|r| "c(#{r.join(',')})"}.join(', ')})"
      R.eval "data <- #{r_data_frame}"
      R.assign "features", (0..independent_variables.size-1).to_a
      R.eval "names(data) <- append(c('activities'),features)" #
      R.eval "model <- train(activities ~ ., data = data, method = '#{method}', na.action = na.pass, allowParallel=TRUE)"
    rescue => e
      $logger.debug "R caret model creation error for:"
      $logger.debug dependent_variables
      $logger.debug independent_variables
      prediction = Algorithm::Regression::weighted_average dependent_variables:dependent_variables, weights:weights
      prediction[:warnings] ||= []
      prediction[:warnings] << "R caret model creation error, using weighted average of similar substances (no prediction interval available)."
      return prediction
    end
    begin
      R.eval "query <- data.frame(rbind(c(#{query_variables.join ','})))"
      R.eval "names(query) <- features" 
      R.eval "prediction <- predict(model,query)"
      value = R.eval("prediction").to_f
      rmse = R.eval("getTrainPerf(model)$TrainRMSE").to_f
      r_squared = R.eval("getTrainPerf(model)$TrainRsquared").to_f
      prediction_interval = value-1.96*rmse, value+1.96*rmse
      prediction = {
        :value => value,
        :rmse => rmse,
        :r_squared => r_squared,
        :prediction_interval => prediction_interval
      }
    rescue => e
      $logger.debug "R caret prediction error for:"
      $logger.debug self.inspect
      prediction = Algorithm::Regression::weighted_average dependent_variables:dependent_variables, weights:weights
      prediction[:warnings] << "R caret prediction error, using weighted average of similar substances (no prediction interval available)."
      return prediction
    end
    if prediction.nil? or prediction[:value].nil?
      prediction = Algorithm::Regression::weighted_average dependent_variables:dependent_variables, weights:weights
      prediction[:warnings] << "Empty R caret prediction, using weighted average of similar substances (no prediction interval available)."
    end
  end
  prediction

end

.method_missing(sym, *args, &block) ⇒ Object

Call caret methods dynamically, e.g. Caret.pls



89
90
91
92
# File 'lib/caret.rb', line 89

def self.method_missing(sym, *args, &block)
  args.first[:method] = sym.to_s
  self.create_model_and_predict args.first
end

.to_r(v) ⇒ Object

Convert Ruby values to R values



95
96
97
98
99
100
# File 'lib/caret.rb', line 95

def self.to_r v
  return "F" if v == false
  return "T" if v == true
  return nil if v.is_a? Float and v.nan?
  v
end