Class: Eps::BaseEstimator

Inherits:
Object
  • Object
show all
Defined in:
lib/eps/base_estimator.rb

Direct Known Subclasses

LightGBM, LinearRegression, NaiveBayes

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(data = nil, y = nil, **options) ⇒ BaseEstimator

Returns a new instance of BaseEstimator.



3
4
5
6
7
8
# File 'lib/eps/base_estimator.rb', line 3

def initialize(data = nil, y = nil, **options)
  @options = options.dup
  # TODO better pattern - don't pass most options to train
  options.delete(:intercept)
  train(data, y, **options) if data
end

Class Method Details

.load_pmml(pmml) ⇒ Object



43
44
45
46
47
48
# File 'lib/eps/base_estimator.rb', line 43

def self.load_pmml(pmml)
  model = new
  model.instance_variable_set("@evaluator", PMML.load(pmml))
  model.instance_variable_set("@pmml", pmml.respond_to?(:to_xml) ? pmml.to_xml : pmml) # cache data
  model
end

Instance Method Details

#evaluate(data, y = nil, target: nil, weight: nil) ⇒ Object



34
35
36
37
# File 'lib/eps/base_estimator.rb', line 34

def evaluate(data, y = nil, target: nil, weight: nil)
  data, target = prep_data(data, y, target || @target, weight)
  Eps.metrics(data.label, predict(data), weight: data.weight)
end

#predict(data) ⇒ Object



10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# File 'lib/eps/base_estimator.rb', line 10

def predict(data)
  singular = data.is_a?(Hash)
  data = [data] if singular

  data = Eps::DataFrame.new(data)

  @evaluator.features.each do |k, type|
    values = data.columns[k]
    raise ArgumentError, "Missing column: #{k}" if !values
    column_type = Utils.column_type(values.compact, k) if values

    if !column_type.nil?
      if (type == "numeric" && column_type != "numeric") || (type != "numeric" && column_type != "categorical")
        raise ArgumentError, "Bad type for column #{k}: Expected #{type} but got #{column_type}"
      end
    end
    # TODO check for unknown values for categorical features
  end

  predictions = @evaluator.predict(data)

  singular ? predictions.first : predictions
end

#summary(extended: false) ⇒ Object



50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# File 'lib/eps/base_estimator.rb', line 50

def summary(extended: false)
  str = String.new("")

  if @validation_set
    y_true = @validation_set.label
    y_pred = predict(@validation_set)

    case @target_type
    when "numeric"
      metric_name = "RMSE"
      v = Metrics.rmse(y_true, y_pred, weight: @validation_set.weight)
      metric_value = v.round >= 1000 ? v.round.to_s : "%.3g" % v
    else
      metric_name = "accuracy"
      metric_value = "%.1f%%" % (100 * Metrics.accuracy(y_true, y_pred, weight: @validation_set.weight)).round(1)
    end
    str << "Validation %s: %s\n\n"  % [metric_name, metric_value]
  end

  str << _summary(extended: extended)
  str
end

#to_pmmlObject



39
40
41
# File 'lib/eps/base_estimator.rb', line 39

def to_pmml
  @pmml ||= PMML.generate(self)
end