Class: SigFil::StatisticalOutlierRemoval

Inherits:
Object
  • Object
show all
Includes:
Math
Defined in:
lib/sigfil/statistical_outlier_removal.rb

Constant Summary collapse

SEARCHER =
[:kdtree, :flann]

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(dataset, mean_k = 2, std_mul = 0.0, searcher = :kdtree) ⇒ StatisticalOutlierRemoval

mean_k: Number of points to use mean distance estimation std_mul: Standard deviation multipliera threshold



16
17
18
19
20
21
22
23
24
25
# File 'lib/sigfil/statistical_outlier_removal.rb', line 16

def initialize dataset, mean_k = 2, std_mul = 0.0, searcher = :kdtree
  @dataset  = dataset
  @mean_k   = mean_k
  @std_mul  = std_mul
  unless SEARCHER.include?(searcher)
    raise ArgumentError, "Unknown searcher type: #{searcher}"
  else
    @searcher = searcher
  end
end

Instance Attribute Details

#datasetObject

Returns the value of attribute dataset.



9
10
11
# File 'lib/sigfil/statistical_outlier_removal.rb', line 9

def dataset
  @dataset
end

#mean_kObject

Returns the value of attribute mean_k.



9
10
11
# File 'lib/sigfil/statistical_outlier_removal.rb', line 9

def mean_k
  @mean_k
end

#searcherObject

Returns the value of attribute searcher.



9
10
11
# File 'lib/sigfil/statistical_outlier_removal.rb', line 9

def searcher
  @searcher
end

#std_mulObject

Returns the value of attribute std_mul.



9
10
11
# File 'lib/sigfil/statistical_outlier_removal.rb', line 9

def std_mul
  @std_mul
end

Instance Method Details

#apply_filter(scale_factors = nil) ⇒ Object



27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# File 'lib/sigfil/statistical_outlier_removal.rb', line 27

def apply_filter scale_factors = nil
  if @searcher == :flann
    Flann.set_distance_type!(:l2)
  end

  if scale_factors
    unless scale_factors.size == @dataset.cols
      raise ArgumentError, "scale_factors.size != dataset.cols"
    else
      dataset = @dataset.clone
      scale_factors.each_with_index do |s, i|
        dataset[0..-1, i] *= s
      end
    end
  else
    dataset = @dataset
  end

  case @searcher
  when :flann
    searcher = Flann::Index.new(@dataset) do |params|
      params[:algorithm]    = :kdtree
      params[:trees]        = 4
      params[:centers_init] = :gonzales
    end
    searcher.build!
  when :kdtree
    dataset_h = dataset.to_a.each_with_index.map {|pt, i| [i, pt]}.to_h
    searcher  = Containers::KDTree.new(dataset_h)
  end

  distances = Array.new(dataset.rows, 0.0)
  dataset.each_row(:clone).with_index do |row, iii|
    case @searcher
    when :flann
      _, dis= searcher.nearest_neighbors(row, @mean_k + 1)
      distances[iii] = NMatrix[*dis[1..-1]].mean[0]
    when :kdtree
      dis = searcher.find_nearest(row.to_a, @mean_k + 1).map do
        |r| sqrt(r[0])
      end
    end

    distances[iii] = NMatrix[*dis[1..-1]].mean[0]
  end

  nm_d   = NMatrix[*distances]
  d_mean = nm_d.mean[0]
  d_std  = nm_d.std[0]
  d_th   = d_mean + @std_mul*d_std

  filtered = []
  distances.each_with_index do |d, i|
    if d <= d_th
      filtered << @dataset.row(i).to_a
    end
  end

  return NMatrix[*filtered]
end