Class: IsoTree::IsolationForest

Inherits:
Object
  • Object
show all
Defined in:
lib/isotree/isolation_forest.rb

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(sample_size: nil, ntrees: 500, ndim: 3, ntry: 3, prob_pick_avg_gain: 0, prob_pick_pooled_gain: 0, prob_split_avg_gain: 0, prob_split_pooled_gain: 0, min_gain: 0, missing_action: "impute", new_categ_action: "smallest", categ_split_type: "subset", all_perm: false, coef_by_prop: false, sample_with_replacement: false, penalize_range: true, weigh_by_kurtosis: false, coefs: "normal", min_imp_obs: 3, depth_imp: "higher", weigh_imp_rows: "inverse", random_seed: 1, nthreads: -1) ⇒ IsolationForest

Returns a new instance of IsolationForest.



3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# File 'lib/isotree/isolation_forest.rb', line 3

def initialize(
  sample_size: nil, ntrees: 500, ndim: 3, ntry: 3,
  prob_pick_avg_gain: 0, prob_pick_pooled_gain: 0,
  prob_split_avg_gain: 0, prob_split_pooled_gain: 0,
  min_gain: 0, missing_action: "impute", new_categ_action: "smallest",
  categ_split_type: "subset", all_perm: false, coef_by_prop: false,
  sample_with_replacement: false, penalize_range: true,
  weigh_by_kurtosis: false, coefs: "normal", min_imp_obs: 3, depth_imp: "higher",
  weigh_imp_rows: "inverse", random_seed: 1, nthreads: -1
)

  @sample_size = sample_size
  @ntrees = ntrees
  @ndim = ndim
  @ntry = ntry
  @prob_pick_avg_gain = prob_pick_avg_gain
  @prob_pick_pooled_gain = prob_pick_pooled_gain
  @prob_split_avg_gain = prob_split_avg_gain
  @prob_split_pooled_gain = prob_split_pooled_gain
  @min_gain = min_gain
  @missing_action = missing_action
  @new_categ_action = new_categ_action
  @categ_split_type = categ_split_type
  @all_perm = all_perm
  @coef_by_prop = coef_by_prop
  @sample_with_replacement = sample_with_replacement
  @penalize_range = penalize_range
  @weigh_by_kurtosis = weigh_by_kurtosis
  @coefs = coefs
  @min_imp_obs = min_imp_obs
  @depth_imp = depth_imp
  @weigh_imp_rows = weigh_imp_rows
  @random_seed = random_seed

  # etc module returns virtual cores
  nthreads = Etc.nprocessors if nthreads < 0
  @nthreads = nthreads
end

Class Method Details

.import_model(path) ⇒ Object



81
82
83
84
85
86
87
# File 'lib/isotree/isolation_forest.rb', line 81

def self.import_model(path)
  model = new
   = JSON.parse(File.read("#{path}.metadata"))
  model.send(:import_metadata, )
  model.instance_variable_set(:@ext_iso_forest, Ext.deserialize_ext_isoforest(path))
  model
end

Instance Method Details

#export_model(path) ⇒ Object

same format as Python so models are compatible



74
75
76
77
78
79
# File 'lib/isotree/isolation_forest.rb', line 74

def export_model(path)
  check_fit

  File.write("#{path}.metadata", JSON.pretty_generate())
  Ext.serialize_ext_isoforest(@ext_iso_forest, path)
end

#fit(x) ⇒ Object



42
43
44
45
46
47
48
49
50
51
52
# File 'lib/isotree/isolation_forest.rb', line 42

def fit(x)
  x = Dataset.new(x)
  prep_fit(x)
  options = data_options(x).merge(fit_options)
  options[:sample_size] ||= options[:nrows]

  # prevent segfault
  options[:sample_size] = options[:nrows] if options[:sample_size] > options[:nrows]

  @ext_iso_forest = Ext.fit_iforest(options)
end

#predict(x, output: "score") ⇒ Object



54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# File 'lib/isotree/isolation_forest.rb', line 54

def predict(x, output: "score")
  check_fit

  x = Dataset.new(x)
  prep_predict(x)

  options = data_options(x).merge(nthreads: @nthreads)
  case output
  when "score"
    options[:standardize] = true
  when "avg_depth"
    options[:standardize] = false
  else
    raise ArgumentError, "Unknown output"
  end

  Ext.predict_iforest(@ext_iso_forest, options)
end