Class: Magnifier

Inherits:
Object
  • Object
show all
Defined in:
lib/magnifier/magnifier.rb

Defined Under Namespace

Classes: Exporter, Importer

Constant Summary collapse

LEARNING_STEPS =

make configurable or check for convergence

1000

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(examples, threshold = 0.01) ⇒ Magnifier

examples is exepcted to be 2-D array of real values



14
15
16
17
18
19
20
21
22
# File 'lib/magnifier/magnifier.rb', line 14

def initialize(examples, threshold = 0.01)
  @training_set = Numo::DFloat[*examples]
  @training_set_size, @features_count = training_set.shape
  @features_count ||= 1
  @threshold = threshold
  @mu_vector = Numo::DFloat.zeros(@features_count)
  @sigma_squared_vector = Numo::DFloat.zeros(@features_count)
  @f1_score = 0
end

Instance Attribute Details

#f1_scoreObject (readonly)

Returns the value of attribute f1_score.



8
9
10
# File 'lib/magnifier/magnifier.rb', line 8

def f1_score
  @f1_score
end

#features_countObject (readonly)

Returns the value of attribute features_count.



8
9
10
# File 'lib/magnifier/magnifier.rb', line 8

def features_count
  @features_count
end

#mu_vectorObject (readonly)

Returns the value of attribute mu_vector.



8
9
10
# File 'lib/magnifier/magnifier.rb', line 8

def mu_vector
  @mu_vector
end

#sigma_squared_vectorObject (readonly)

Returns the value of attribute sigma_squared_vector.



8
9
10
# File 'lib/magnifier/magnifier.rb', line 8

def sigma_squared_vector
  @sigma_squared_vector
end

#thresholdObject

Returns the value of attribute threshold.



11
12
13
# File 'lib/magnifier/magnifier.rb', line 11

def threshold
  @threshold
end

#training_setObject (readonly)

Returns the value of attribute training_set.



8
9
10
# File 'lib/magnifier/magnifier.rb', line 8

def training_set
  @training_set
end

#training_set_sizeObject (readonly)

Returns the value of attribute training_set_size.



8
9
10
# File 'lib/magnifier/magnifier.rb', line 8

def training_set_size
  @training_set_size
end

Instance Method Details

#anomaly?(example) ⇒ Boolean

Returns:

  • (Boolean)


63
64
65
# File 'lib/magnifier/magnifier.rb', line 63

def anomaly?(example)
  probability(example) < threshold
end

#export(path_or_file) ⇒ Object



71
72
73
# File 'lib/magnifier/magnifier.rb', line 71

def export(path_or_file)
  Magnifier::Exporter.export(path_or_file, self)
end

#import(path_or_file) ⇒ Object



67
68
69
# File 'lib/magnifier/magnifier.rb', line 67

def import(path_or_file)
  Magnifier::Importer.export(path_or_file, self)
end

#optimize_threshold(examples, base_truths) ⇒ Object

optimize using F1 score requires cross-validation set (should differ from train set!) todo: convert base truth to boolean



32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# File 'lib/magnifier/magnifier.rb', line 32

def optimize_threshold(examples, base_truths)
  boolean_base_thruths = base_truths.map{ |value| value == 1 || value == true }
  examples_prob = examples.map { |example| probability(example) }

  threshold_step = (examples_prob.max - examples_prob.min) / LEARNING_STEPS
  @threshold = 0

  (examples_prob.min..examples_prob.max).step(threshold_step) do |new_threshold|
    predictions = examples_prob.map { |probability| probability < new_threshold }
    current_f1 = compute_f1_score(predictions, boolean_base_thruths)

    if current_f1 > @f1_score
      @f1_score = current_f1
      @threshold = new_threshold
    end
  end

  [threshold, f1_score]
end

#probability(example) ⇒ Object



52
53
54
55
56
57
58
59
60
61
# File 'lib/magnifier/magnifier.rb', line 52

def probability(example)
  probability = 1
  example.each_with_index do |feature, i|
    feature_prob = Math.exp(-((feature - mu_vector[i])**2 / (2 * sigma_squared_vector[i]))) / ((2 * Math::PI * sigma_squared_vector[i])**(0.5))

    probability = probability * feature_prob
  end

  probability
end

#trainObject



24
25
26
27
# File 'lib/magnifier/magnifier.rb', line 24

def train
  @mu_vector = @training_set.mean(0)
  @sigma_squared_vector = (((training_set - mu_vector) ** 2).sum(0) / training_set_size).to_a
end