Class: Anomaly::Detector
- Inherits:
-
Object
- Object
- Anomaly::Detector
- Defined in:
- lib/anomaly/detector.rb
Instance Attribute Summary collapse
-
#eps ⇒ Object
Returns the value of attribute eps.
Instance Method Summary collapse
- #anomaly?(x, eps = @eps) ⇒ Boolean
-
#initialize(examples = nil, opts = {}) ⇒ Detector
constructor
A new instance of Detector.
-
#probability(x) ⇒ Object
Limit the probability of features to [0,1] to keep probabilities at same scale.
- #train(examples, opts = {}) ⇒ Object
- #trained? ⇒ Boolean
Constructor Details
#initialize(examples = nil, opts = {}) ⇒ Detector
Returns a new instance of Detector.
5 6 7 8 |
# File 'lib/anomaly/detector.rb', line 5 def initialize(examples = nil, opts = {}) @m = 0 train(examples, opts) if examples end |
Instance Attribute Details
#eps ⇒ Object
Returns the value of attribute eps.
3 4 5 |
# File 'lib/anomaly/detector.rb', line 3 def eps @eps end |
Instance Method Details
#anomaly?(x, eps = @eps) ⇒ Boolean
76 77 78 |
# File 'lib/anomaly/detector.rb', line 76 def anomaly?(x, eps = @eps) probability(x) < eps end |
#probability(x) ⇒ Object
Limit the probability of features to [0,1] to keep probabilities at same scale.
67 68 69 70 71 72 73 74 |
# File 'lib/anomaly/detector.rb', line 67 def probability(x) raise "Train me first" unless trained? raise ArgumentError, "First argument must have #{@n} elements" if x.size != @n @n.times.map do |i| p = normal_pdf(x[i], @mean[i], @std[i]) (p.nan? or p > 1) ? 1 : p end.reduce(1, :*) end |
#train(examples, opts = {}) ⇒ Object
10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
# File 'lib/anomaly/detector.rb', line 10 def train(examples, opts = {}) raise "No examples" if examples.empty? raise "Must have at least two columns" if examples.first.size < 2 # Divide into groups since we only want to train with non-anomalies. anomalies = [] non_anomalies = [] examples.each do |example| if example.last == 0 non_anomalies << example else anomalies << example end end raise "Must have at least one non-anomaly" if non_anomalies.empty? @eps = (opts[:eps] || 0).to_f if @eps > 0 # Use all non-anomalies to train. training_examples = non_anomalies else training_examples, test_examples = partition!(non_anomalies) test_examples.concat(anomalies) end # Remove last column. training_examples = training_examples.map{|e| e[0..-2]} @m = training_examples.size @n = training_examples.first.size if defined?(NMatrix) training_examples = NMatrix.to_na(training_examples) # Convert these to an Array for Marshal.dump @mean = training_examples.mean(1).to_a @std = training_examples.stddev(1).to_a else # Default to Array, since built-in Matrix does not give us a big performance advantage. cols = @n.times.map{|i| training_examples.map{|r| r[i]}} @mean = cols.map{|c| mean(c)} @std = cols.each_with_index.map{|c,i| std(c, @mean[i])} end @std.map!{|std| (std == 0 or std.nan?) ? Float::MIN : std} if @eps == 0 # Find the best eps. epss = (1..9).map{|i| [1,3,5,7,9].map{|j| (j*10**(-i)).to_f }}.flatten f1_scores = epss.map{|eps| [eps, compute_f1_score(test_examples, eps)] } @eps, best_f1 = f1_scores.max_by{|v| v[1]} end end |
#trained? ⇒ Boolean
61 62 63 |
# File 'lib/anomaly/detector.rb', line 61 def trained? @m > 0 end |