Module: OpenTox::Validation::ClassificationStatistics

Included in:
ClassificationCrossValidation, ClassificationLeaveOneOut, ClassificationTrainTest
Defined in:
lib/validation-statistics.rb

Overview

Statistical evaluation of classification validations

Instance Method Summary collapse

Instance Method Details

#probability_plot(format: "pdf") ⇒ Blob

Plot accuracy vs prediction probability

Parameters:

  • format (String, nil) (defaults to: "pdf")

Returns:

  • (Blob)


76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# File 'lib/validation-statistics.rb', line 76

def probability_plot format: "pdf"
  #unless probability_plot_id

    #tmpdir = File.join(ENV["HOME"], "tmp")
    tmpdir = "/tmp"
    #p tmpdir
    FileUtils.mkdir_p tmpdir
    tmpfile = File.join(tmpdir,"#{id.to_s}_probability.#{format}")
    accuracies = []
    probabilities = []
    correct_predictions = 0
    incorrect_predictions = 0
    pp = []
    predictions.values.select{|p| p["probabilities"]}.compact.each do |p|
      p["measurements"].each do |m|
        pp << [ p["probabilities"][p["value"]], p["value"] == m ]
      end
    end
    pp.sort_by!{|p| 1-p.first}
    pp.each do |p|
      p[1] ? correct_predictions += 1 : incorrect_predictions += 1
      accuracies << correct_predictions/(correct_predictions+incorrect_predictions).to_f
      probabilities << p[0]
    end
    R.assign "accuracy", accuracies
    R.assign "probability", probabilities
    R.eval "image = qplot(probability,accuracy)+ylab('Accumulated accuracy')+xlab('Prediction probability')+ylim(c(0,1))+scale_x_reverse()+geom_line()"
    R.eval "ggsave(file='#{tmpfile}', plot=image)"
    file = Mongo::Grid::File.new(File.read(tmpfile), :filename => "#{self.id.to_s}_probability_plot.#{format}")
    plot_id = $gridfs.insert_one(file)
    update(:probability_plot_id => plot_id)
  #end
  $gridfs.find_one(_id: probability_plot_id).data
end

#statisticsHash

Get statistics

Returns:

  • (Hash)


8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# File 'lib/validation-statistics.rb', line 8

def statistics 
  self.accept_values = model.prediction_feature.accept_values
  self.confusion_matrix = {:all => Array.new(accept_values.size){Array.new(accept_values.size,0)}, :confidence_high => Array.new(accept_values.size){Array.new(accept_values.size,0)}, :confidence_low => Array.new(accept_values.size){Array.new(accept_values.size,0)}}
  self.nr_predictions = {:all => 0,:confidence_high => 0,:confidence_low => 0}
  predictions.each do |cid,pred|
    # TODO: use measured majority class or all measurements??
    if pred[:measurements].uniq.size == 1 and pred[:probabilities]
      m = pred[:measurements].first
      if pred[:value] == m
        accept_values.each_with_index do |v,i|
          if pred[:value] == v
            confusion_matrix[:all][i][i] += 1
            self.nr_predictions[:all] += 1
            if pred[:confidence].match(/Similar/i)
              confusion_matrix[:confidence_high][i][i] += 1
              self.nr_predictions[:confidence_high] += 1
            elsif pred[:confidence].match(/Low/i)
              confusion_matrix[:confidence_low][i][i] += 1
              self.nr_predictions[:confidence_low] += 1
            end
          end
        end
      elsif pred[:value] != m
        accept_values.each_with_index do |v,i|
          if pred[:value] == v
            confusion_matrix[:all][i][(i+1)%2] += 1
            self.nr_predictions[:all] += 1
            if pred[:confidence].match(/Similar/i)
              confusion_matrix[:confidence_high][i][(i+1)%2] += 1
              self.nr_predictions[:confidence_high] += 1
            elsif pred[:confidence].match(/Low/i)
              confusion_matrix[:confidence_low][i][(i+1)%2] += 1
              self.nr_predictions[:confidence_low] += 1
            end
          end
        end
      end
    end
  end

  self.true_rate = {:all => {}, :confidence_high => {}, :confidence_low => {}}
  self.predictivity = {:all => {}, :confidence_high => {}, :confidence_low => {}}
  accept_values.each_with_index do |v,i|
    [:all,:confidence_high,:confidence_low].each do |a|
      self.true_rate[a][v] = confusion_matrix[a][i][i]/confusion_matrix[a][i].reduce(:+).to_f
      self.predictivity[a][v] = confusion_matrix[a][i][i]/confusion_matrix[a].collect{|n| n[i]}.reduce(:+).to_f
    end
  end
  self.accuracy = {}
  [:all,:confidence_high,:confidence_low].each do |a|
    self.accuracy[a] = (confusion_matrix[a][0][0]+confusion_matrix[a][1][1])/nr_predictions[a].to_f
  end
  $logger.debug "Accuracy #{accuracy}"
  $logger.debug "Nr Predictions #{nr_predictions}"
  save
  {
    :accept_values => accept_values,
    :confusion_matrix => confusion_matrix,
    :accuracy => accuracy,
    :true_rate => self.true_rate,
    :predictivity => self.predictivity,
    :nr_predictions => nr_predictions,
  }
end