14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
|
# File 'lib/adaboost/features_analyzer.rb', line 14
def analyze(samples)
statistics = []
distribution = Distribution.new(0, 0)
number_of_samples = samples.size
if number_of_samples < 1
raise ArgumentError.new('At least one sample is needed to analyze.')
end
number_of_features = @y_index
sample_size = samples[0].size
if number_of_features < 1 or sample_size < 2 or sample_size <= @y_index
raise ArgumentError.new('At least 1 feature is needed to analyze.')
end
0.upto(number_of_features - 1) do
statistics << FeatureStatistic.new(Float::MAX, -Float::MAX, 0, 0, 0, 0)
end
samples.each do |sample|
y = sample[@y_index]
if y == -1
distribution.negative += 1
else
distribution.positive += 1
end
0.upto(number_of_features - 1) do |i|
statistic = statistics[i]
feature_value = sample[i]
if feature_value < statistic.min
statistic.min = feature_value
end
if feature_value > statistic.max
statistic.max = feature_value
end
statistic.sum += feature_value
end
end
statistics.each do |statistic|
statistic.avg = statistic.sum / number_of_samples.to_f
statistic.rng = (statistic.max - statistic.min).abs
end
samples.each do |sample|
statistics.each_with_index do |statistic, i|
feature_value = sample[i]
statistic.vrn += (statistic.avg - feature_value) ** 2
end
end
statistics.each do |statistic|
statistic.vrn /= (number_of_samples - 1).to_f
statistic.std = Math.sqrt statistic.vrn
end
analyze = Analyze.new
analyze.statistics = statistics
analyze.distribution = distribution
analyze
end
|