Class: Anomaly::Detector

Inherits:

Object

Object
Anomaly::Detector

show all

Defined in:: lib/anomaly/detector.rb

Instance Attribute Summary collapse

#eps ⇒ Object

Returns the value of attribute eps.
#mean ⇒ Object readonly

Returns the value of attribute mean.
#std ⇒ Object readonly

Returns the value of attribute std.

Instance Method Summary collapse

#anomaly?(x, eps = @eps) ⇒ Boolean
#initialize(examples = nil, **opts) ⇒ Detector constructor

A new instance of Detector.
#probability(x) ⇒ Object

Limit the probability of features to [0,1] to keep probabilities at same scale.
#train(examples, eps: 0) ⇒ Object
#trained? ⇒ Boolean

Constructor Details

#initialize(examples = nil, **opts) ⇒ `Detector`

Returns a new instance of Detector.

# File 'lib/anomaly/detector.rb', line 6

def initialize(examples = nil, **opts)
  @m = 0
  train(examples, **opts) if examples
end

Instance Attribute Details

#eps ⇒ `Object`

Returns the value of attribute eps.



4
5
6

# File 'lib/anomaly/detector.rb', line 4

def eps
  @eps
end

#mean ⇒ `Object` (readonly)

Returns the value of attribute mean.



3
4
5

# File 'lib/anomaly/detector.rb', line 3

def mean
  @mean
end

#std ⇒ `Object` (readonly)

Returns the value of attribute std.



3
4
5

# File 'lib/anomaly/detector.rb', line 3

def std
  @std
end

Instance Method Details

#anomaly?(x, eps = @eps) ⇒ `Boolean`

Returns:

(Boolean)



86
87
88

# File 'lib/anomaly/detector.rb', line 86

def anomaly?(x, eps = @eps)
  probability(x) < eps
end

#probability(x) ⇒ `Object`

Limit the probability of features to [0,1] to keep probabilities at same scale.

Raises:

(ArgumentError)

# File 'lib/anomaly/detector.rb', line 77

def probability(x)
  raise "Train me first" unless trained?
  raise ArgumentError, "First argument must have #{@n} elements" if x.size != @n
  @n.times.map do |i|
    p = normal_pdf(x[i], @mean[i], @std[i])
    (p.nan? || p > 1) ? 1 : p
  end.reduce(1, :*)
end

#train(examples, eps: 0) ⇒ `Object`

# File 'lib/anomaly/detector.rb', line 11

def train(examples, eps: 0)
  # for Numo::NArray
  # TODO make more efficient when possible
  examples = examples.to_a

  raise "No examples" if examples.empty?
  raise "Must have at least two columns" if examples.first.size < 2

  # Divide into groups since we only want to train with non-anomalies.
  anomalies = []
  non_anomalies = []
  examples.each do |example|
    if example.last == 0
      non_anomalies << example
    else
      anomalies << example
    end
  end

  raise "Must have at least one non-anomaly" if non_anomalies.empty?

  @eps = eps
  if @eps > 0
    # Use all non-anomalies to train.
    training_examples = non_anomalies
  else
    training_examples, test_examples = partition!(non_anomalies)
    test_examples.concat(anomalies)
  end
  # Remove last column.
  training_examples = training_examples.map { |e| e[0..-2] }
  @m = training_examples.size
  @n = training_examples.first.size

  if defined?(Numo::SFloat)
    training_examples = Numo::SFloat.cast(training_examples)
    # Convert these to an Array for Marshal.dump
    @mean = training_examples.mean(0).to_a
    @std = training_examples.stddev(0).to_a
  elsif defined?(NMatrix)
    training_examples = NMatrix.to_na(training_examples)
    # Convert these to an Array for Marshal.dump
    @mean = training_examples.mean(1).to_a
    @std = training_examples.stddev(1).to_a
  else
    # Default to Array, since built-in Matrix does not give us a big performance advantage.
    cols = @n.times.map { |i| training_examples.map { |r| r[i] } }
    @mean = cols.map { |c| alt_mean(c) }
    @std = cols.each_with_index.map { |c, i| alt_std(c, @mean[i]) }
  end
  @std.map! { |std| (std == 0 || std.nan?) ? Float::MIN : std }

  if @eps == 0
    # Find the best eps.
    epss = (1..9).map { |i| [1, 3, 5, 7, 9].map { |j| (j * 10**(-i)).to_f } }.flatten
    f1_scores = epss.map { |eps| [eps, compute_f1_score(test_examples, eps)] }
    @eps, _ = f1_scores.max_by { |v| v[1] }
  end
end

#trained? ⇒ `Boolean`

Returns:

(Boolean)



71
72
73

# File 'lib/anomaly/detector.rb', line 71

def trained?
  @m > 0
end

Class: Anomaly::Detector

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(examples = nil, **opts) ⇒ Detector

Instance Attribute Details

#eps ⇒ Object

#mean ⇒ Object (readonly)

#std ⇒ Object (readonly)

Instance Method Details

#anomaly?(x, eps = @eps) ⇒ Boolean

#probability(x) ⇒ Object

#train(examples, eps: 0) ⇒ Object

#trained? ⇒ Boolean