Module: OpenTox::Validation::ClassificationStatistics

Included in:
ClassificationCrossValidation, ClassificationLeaveOneOut, ClassificationTrainTest
Defined in:
lib/validation-statistics.rb

Overview

Statistical evaluation of classification validations

Instance Method Summary collapse

Instance Method Details

#probability_plot(format: "pdf") ⇒ Blob

Plot accuracy vs prediction probability

Parameters:

  • format (String, nil) (defaults to: "pdf")

Returns:

  • (Blob)


72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# File 'lib/validation-statistics.rb', line 72

def probability_plot format: "pdf"
  #unless probability_plot_id

    #tmpdir = File.join(ENV["HOME"], "tmp")
    tmpdir = "/tmp"
    #p tmpdir
    FileUtils.mkdir_p tmpdir
    tmpfile = File.join(tmpdir,"#{id.to_s}_probability.#{format}")
    accuracies = []
    probabilities = []
    correct_predictions = 0
    incorrect_predictions = 0
    pp = []
    predictions.values.select{|p| p["probabilities"]}.compact.each do |p|
      p["measurements"].each do |m|
        pp << [ p["probabilities"][p["value"]], p["value"] == m ]
      end
    end
    pp.sort_by!{|p| 1-p.first}
    pp.each do |p|
      p[1] ? correct_predictions += 1 : incorrect_predictions += 1
      accuracies << correct_predictions/(correct_predictions+incorrect_predictions).to_f
      probabilities << p[0]
    end
    R.assign "accuracy", accuracies
    R.assign "probability", probabilities
    R.eval "image = qplot(probability,accuracy)+ylab('Accumulated accuracy')+xlab('Prediction probability')+ylim(c(0,1))+scale_x_reverse()+geom_line()"
    R.eval "ggsave(file='#{tmpfile}', plot=image)"
    file = Mongo::Grid::File.new(File.read(tmpfile), :filename => "#{self.id.to_s}_probability_plot.svg")
    plot_id = $gridfs.insert_one(file)
    update(:probability_plot_id => plot_id)
  #end
  $gridfs.find_one(_id: probability_plot_id).data
end

#statisticsHash

Get statistics

Returns:

  • (Hash)


8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# File 'lib/validation-statistics.rb', line 8

def statistics 
  self.accept_values = model.prediction_feature.accept_values
  self.confusion_matrix = Array.new(accept_values.size){Array.new(accept_values.size,0)}
  self.weighted_confusion_matrix = Array.new(accept_values.size){Array.new(accept_values.size,0)}
  nr_instances = 0
  predictions.each do |cid,pred|
    # TODO
    # use predictions without probabilities (single neighbor)??
    # use measured majority class??
    if pred[:measurements].uniq.size == 1 and pred[:probabilities]
      m = pred[:measurements].first
      if pred[:value] == m
        if pred[:value] == accept_values[0]
          confusion_matrix[0][0] += 1
          weighted_confusion_matrix[0][0] += pred[:probabilities][pred[:value]]
          nr_instances += 1
        elsif pred[:value] == accept_values[1]
          confusion_matrix[1][1] += 1
          weighted_confusion_matrix[1][1] += pred[:probabilities][pred[:value]]
          nr_instances += 1
        end
      elsif pred[:value] != m
        if pred[:value] == accept_values[0]
          confusion_matrix[0][1] += 1
          weighted_confusion_matrix[0][1] += pred[:probabilities][pred[:value]]
          nr_instances += 1
        elsif pred[:value] == accept_values[1]
          confusion_matrix[1][0] += 1
          weighted_confusion_matrix[1][0] += pred[:probabilities][pred[:value]]
          nr_instances += 1
        end
      end
    end
  end
  self.true_rate = {}
  self.predictivity = {}
  accept_values.each_with_index do |v,i|
    self.true_rate[v] = confusion_matrix[i][i]/confusion_matrix[i].reduce(:+).to_f
    self.predictivity[v] = confusion_matrix[i][i]/confusion_matrix.collect{|n| n[i]}.reduce(:+).to_f
  end
  confidence_sum = 0
  weighted_confusion_matrix.each do |r|
    r.each do |c|
      confidence_sum += c
    end
  end
  self.accuracy = (confusion_matrix[0][0]+confusion_matrix[1][1])/nr_instances.to_f
  self.weighted_accuracy = (weighted_confusion_matrix[0][0]+weighted_confusion_matrix[1][1])/confidence_sum.to_f
  $logger.debug "Accuracy #{accuracy}"
  save
  {
    :accept_values => accept_values,
    :confusion_matrix => confusion_matrix,
    :weighted_confusion_matrix => weighted_confusion_matrix,
    :accuracy => accuracy,
    :weighted_accuracy => weighted_accuracy,
    :true_rate => self.true_rate,
    :predictivity => self.predictivity,
  }
end