Class: SigFil::StatisticalOutlierRemoval
- Inherits:
-
Object
- Object
- SigFil::StatisticalOutlierRemoval
- Includes:
- Math
- Defined in:
- lib/sigfil/statistical_outlier_removal.rb
Constant Summary collapse
- SEARCHER =
[:kdtree, :flann]
Instance Attribute Summary collapse
-
#dataset ⇒ Object
Returns the value of attribute dataset.
-
#mean_k ⇒ Object
Returns the value of attribute mean_k.
-
#searcher ⇒ Object
Returns the value of attribute searcher.
-
#std_mul ⇒ Object
Returns the value of attribute std_mul.
Instance Method Summary collapse
- #apply_filter(scale_factors = nil) ⇒ Object
-
#initialize(dataset, mean_k = 2, std_mul = 0.0, searcher = :kdtree) ⇒ StatisticalOutlierRemoval
constructor
mean_k: Number of points to use mean distance estimationstd_mul: Standard deviation multipliera threshold.
Constructor Details
#initialize(dataset, mean_k = 2, std_mul = 0.0, searcher = :kdtree) ⇒ StatisticalOutlierRemoval
mean_k: Number of points to use mean distance estimation std_mul: Standard deviation multipliera threshold
16 17 18 19 20 21 22 23 24 25 |
# File 'lib/sigfil/statistical_outlier_removal.rb', line 16 def initialize dataset, mean_k = 2, std_mul = 0.0, searcher = :kdtree @dataset = dataset @mean_k = mean_k @std_mul = std_mul unless SEARCHER.include?(searcher) raise ArgumentError, "Unknown searcher type: #{searcher}" else @searcher = searcher end end |
Instance Attribute Details
#dataset ⇒ Object
Returns the value of attribute dataset.
9 10 11 |
# File 'lib/sigfil/statistical_outlier_removal.rb', line 9 def dataset @dataset end |
#mean_k ⇒ Object
Returns the value of attribute mean_k.
9 10 11 |
# File 'lib/sigfil/statistical_outlier_removal.rb', line 9 def mean_k @mean_k end |
#searcher ⇒ Object
Returns the value of attribute searcher.
9 10 11 |
# File 'lib/sigfil/statistical_outlier_removal.rb', line 9 def searcher @searcher end |
#std_mul ⇒ Object
Returns the value of attribute std_mul.
9 10 11 |
# File 'lib/sigfil/statistical_outlier_removal.rb', line 9 def std_mul @std_mul end |
Instance Method Details
#apply_filter(scale_factors = nil) ⇒ Object
27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
# File 'lib/sigfil/statistical_outlier_removal.rb', line 27 def apply_filter scale_factors = nil if @searcher == :flann Flann.set_distance_type!(:l2) end if scale_factors unless scale_factors.size == @dataset.cols raise ArgumentError, "scale_factors.size != dataset.cols" else dataset = @dataset.clone scale_factors.each_with_index do |s, i| dataset[0..-1, i] *= s end end else dataset = @dataset end case @searcher when :flann searcher = Flann::Index.new(@dataset) do |params| params[:algorithm] = :kdtree params[:trees] = 4 params[:centers_init] = :gonzales end searcher.build! when :kdtree dataset_h = dataset.to_a.each_with_index.map {|pt, i| [i, pt]}.to_h searcher = Containers::KDTree.new(dataset_h) end distances = Array.new(dataset.rows, 0.0) dataset.each_row(:clone).with_index do |row, iii| case @searcher when :flann _, dis= searcher.nearest_neighbors(row, @mean_k + 1) distances[iii] = NMatrix[*dis[1..-1]].mean[0] when :kdtree dis = searcher.find_nearest(row.to_a, @mean_k + 1).map do |r| sqrt(r[0]) end end distances[iii] = NMatrix[*dis[1..-1]].mean[0] end nm_d = NMatrix[*distances] d_mean = nm_d.mean[0] d_std = nm_d.std[0] d_th = d_mean + @std_mul*d_std filtered = [] distances.each_with_index do |d, i| if d <= d_th filtered << @dataset.row(i).to_a end end return NMatrix[*filtered] end |