Module: OpenTox::Validation::ClassificationStatistics

Included in:
ClassificationCrossValidation, ClassificationLeaveOneOut, ClassificationTrainTest
Defined in:
lib/validation-statistics.rb

Overview

Statistical evaluation of classification validations

Instance Method Summary collapse

Instance Method Details

#probability_plot(format: "pdf") ⇒ Blob

Plot accuracy vs prediction probability



76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# File 'lib/validation-statistics.rb', line 76

def probability_plot format: "pdf"
  #unless probability_plot_id

    #tmpdir = File.join(ENV["HOME"], "tmp")
    tmpdir = "/tmp"
    #p tmpdir
    FileUtils.mkdir_p tmpdir
    tmpfile = File.join(tmpdir,"#{id.to_s}_probability.#{format}")
    accuracies = []
    probabilities = []
    correct_predictions = 0
    incorrect_predictions = 0
    pp = []
    predictions.values.select{|p| p["probabilities"]}.compact.each do |p|
      p["measurements"].each do |m|
        pp << [ p["probabilities"][p["value"]], p["value"] == m ]
      end
    end
    pp.sort_by!{|p| 1-p.first}
    pp.each do |p|
      p[1] ? correct_predictions += 1 : incorrect_predictions += 1
      accuracies << correct_predictions/(correct_predictions+incorrect_predictions).to_f
      probabilities << p[0]
    end
    R.assign "accuracy", accuracies
    R.assign "probability", probabilities
    R.eval "image = qplot(probability,accuracy)+ylab('Accumulated accuracy')+xlab('Prediction probability')+ylim(c(0,1))+scale_x_reverse()+geom_line()"
    R.eval "ggsave(file='#{tmpfile}', plot=image)"
    file = Mongo::Grid::File.new(File.read(tmpfile), :filename => "#{self.id.to_s}_probability_plot.#{format}")
    plot_id = $gridfs.insert_one(file)
    update(:probability_plot_id => plot_id)
  #end
  $gridfs.find_one(_id: probability_plot_id).data
end

#statisticsHash

Get statistics



8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# File 'lib/validation-statistics.rb', line 8

def statistics 
  self.accept_values = model.prediction_feature.accept_values
  self.confusion_matrix = {:all => Array.new(accept_values.size){Array.new(accept_values.size,0)}, :confidence_high => Array.new(accept_values.size){Array.new(accept_values.size,0)}, :confidence_low => Array.new(accept_values.size){Array.new(accept_values.size,0)}}
  self.nr_predictions = {:all => 0,:confidence_high => 0,:confidence_low => 0}
  predictions.each do |cid,pred|
    # TODO: use measured majority class or all measurements??
    if pred[:measurements].uniq.size == 1 and pred[:probabilities]
      m = pred[:measurements].first
      if pred[:value] == m
        accept_values.each_with_index do |v,i|
          if pred[:value] == v
            confusion_matrix[:all][i][i] += 1
            self.nr_predictions[:all] += 1
            if pred[:confidence].match(/Similar/i)
              confusion_matrix[:confidence_high][i][i] += 1
              self.nr_predictions[:confidence_high] += 1
            elsif pred[:confidence].match(/Low/i)
              confusion_matrix[:confidence_low][i][i] += 1
              self.nr_predictions[:confidence_low] += 1
            end
          end
        end
      elsif pred[:value] != m
        accept_values.each_with_index do |v,i|
          if pred[:value] == v
            confusion_matrix[:all][i][(i+1)%2] += 1
            self.nr_predictions[:all] += 1
            if pred[:confidence].match(/Similar/i)
              confusion_matrix[:confidence_high][i][(i+1)%2] += 1
              self.nr_predictions[:confidence_high] += 1
            elsif pred[:confidence].match(/Low/i)
              confusion_matrix[:confidence_low][i][(i+1)%2] += 1
              self.nr_predictions[:confidence_low] += 1
            end
          end
        end
      end
    end
  end

  self.true_rate = {:all => {}, :confidence_high => {}, :confidence_low => {}}
  self.predictivity = {:all => {}, :confidence_high => {}, :confidence_low => {}}
  accept_values.each_with_index do |v,i|
    [:all,:confidence_high,:confidence_low].each do |a|
      self.true_rate[a][v] = confusion_matrix[a][i][i]/confusion_matrix[a][i].reduce(:+).to_f
      self.predictivity[a][v] = confusion_matrix[a][i][i]/confusion_matrix[a].collect{|n| n[i]}.reduce(:+).to_f
    end
  end
  self.accuracy = {}
  [:all,:confidence_high,:confidence_low].each do |a|
    self.accuracy[a] = (confusion_matrix[a][0][0]+confusion_matrix[a][1][1])/nr_predictions[a].to_f
  end
  $logger.debug "Accuracy #{accuracy}"
  $logger.debug "Nr Predictions #{nr_predictions}"
  save
  {
    :accept_values => accept_values,
    :confusion_matrix => confusion_matrix,
    :accuracy => accuracy,
    :true_rate => self.true_rate,
    :predictivity => self.predictivity,
    :nr_predictions => nr_predictions,
  }
end