Class: Anomaly::Detector
- Inherits:
-
Object
- Object
- Anomaly::Detector
- Defined in:
- lib/anomaly/detector.rb
Instance Attribute Summary collapse
-
#eps ⇒ Object
Returns the value of attribute eps.
-
#mean ⇒ Object
readonly
Returns the value of attribute mean.
-
#std ⇒ Object
readonly
Returns the value of attribute std.
Instance Method Summary collapse
- #anomaly?(x, eps = @eps) ⇒ Boolean
-
#initialize(examples = nil, **opts) ⇒ Detector
constructor
A new instance of Detector.
-
#probability(x) ⇒ Object
Limit the probability of features to [0,1] to keep probabilities at same scale.
- #train(examples, eps: 0) ⇒ Object
- #trained? ⇒ Boolean
Constructor Details
#initialize(examples = nil, **opts) ⇒ Detector
Returns a new instance of Detector.
6 7 8 9 |
# File 'lib/anomaly/detector.rb', line 6 def initialize(examples = nil, **opts) @m = 0 train(examples, **opts) if examples end |
Instance Attribute Details
#eps ⇒ Object
Returns the value of attribute eps.
4 5 6 |
# File 'lib/anomaly/detector.rb', line 4 def eps @eps end |
#mean ⇒ Object (readonly)
Returns the value of attribute mean.
3 4 5 |
# File 'lib/anomaly/detector.rb', line 3 def mean @mean end |
#std ⇒ Object (readonly)
Returns the value of attribute std.
3 4 5 |
# File 'lib/anomaly/detector.rb', line 3 def std @std end |
Instance Method Details
#anomaly?(x, eps = @eps) ⇒ Boolean
86 87 88 |
# File 'lib/anomaly/detector.rb', line 86 def anomaly?(x, eps = @eps) probability(x) < eps end |
#probability(x) ⇒ Object
Limit the probability of features to [0,1] to keep probabilities at same scale.
77 78 79 80 81 82 83 84 |
# File 'lib/anomaly/detector.rb', line 77 def probability(x) raise "Train me first" unless trained? raise ArgumentError, "First argument must have #{@n} elements" if x.size != @n @n.times.map do |i| p = normal_pdf(x[i], @mean[i], @std[i]) (p.nan? || p > 1) ? 1 : p end.reduce(1, :*) end |
#train(examples, eps: 0) ⇒ Object
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
# File 'lib/anomaly/detector.rb', line 11 def train(examples, eps: 0) # for Numo::NArray # TODO make more efficient when possible examples = examples.to_a raise "No examples" if examples.empty? raise "Must have at least two columns" if examples.first.size < 2 # Divide into groups since we only want to train with non-anomalies. anomalies = [] non_anomalies = [] examples.each do |example| if example.last == 0 non_anomalies << example else anomalies << example end end raise "Must have at least one non-anomaly" if non_anomalies.empty? @eps = eps if @eps > 0 # Use all non-anomalies to train. training_examples = non_anomalies else training_examples, test_examples = partition!(non_anomalies) test_examples.concat(anomalies) end # Remove last column. training_examples = training_examples.map { |e| e[0..-2] } @m = training_examples.size @n = training_examples.first.size if defined?(Numo::SFloat) training_examples = Numo::SFloat.cast(training_examples) # Convert these to an Array for Marshal.dump @mean = training_examples.mean(0).to_a @std = training_examples.stddev(0).to_a elsif defined?(NMatrix) training_examples = NMatrix.to_na(training_examples) # Convert these to an Array for Marshal.dump @mean = training_examples.mean(1).to_a @std = training_examples.stddev(1).to_a else # Default to Array, since built-in Matrix does not give us a big performance advantage. cols = @n.times.map { |i| training_examples.map { |r| r[i] } } @mean = cols.map { |c| alt_mean(c) } @std = cols.each_with_index.map { |c, i| alt_std(c, @mean[i]) } end @std.map! { |std| (std == 0 || std.nan?) ? Float::MIN : std } if @eps == 0 # Find the best eps. epss = (1..9).map { |i| [1, 3, 5, 7, 9].map { |j| (j * 10**(-i)).to_f } }.flatten f1_scores = epss.map { |eps| [eps, compute_f1_score(test_examples, eps)] } @eps, _ = f1_scores.max_by { |v| v[1] } end end |
#trained? ⇒ Boolean
71 72 73 |
# File 'lib/anomaly/detector.rb', line 71 def trained? @m > 0 end |