Module: OpenTox::Validation::ClassificationStatistics
- Defined in:
- lib/validation-statistics.rb
Overview
Statistical evaluation of classification validations
Instance Method Summary collapse
-
#probability_plot(format: "pdf") ⇒ Blob
Plot accuracy vs prediction probability.
-
#statistics ⇒ Hash
Get statistics.
Instance Method Details
#probability_plot(format: "pdf") ⇒ Blob
Plot accuracy vs prediction probability
72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
# File 'lib/validation-statistics.rb', line 72 def probability_plot format: "pdf" #unless probability_plot_id #tmpdir = File.join(ENV["HOME"], "tmp") tmpdir = "/tmp" #p tmpdir FileUtils.mkdir_p tmpdir tmpfile = File.join(tmpdir,"#{id.to_s}_probability.#{format}") accuracies = [] probabilities = [] correct_predictions = 0 incorrect_predictions = 0 pp = [] predictions.values.select{|p| p["probabilities"]}.compact.each do |p| p["measurements"].each do |m| pp << [ p["probabilities"][p["value"]], p["value"] == m ] end end pp.sort_by!{|p| 1-p.first} pp.each do |p| p[1] ? correct_predictions += 1 : incorrect_predictions += 1 accuracies << correct_predictions/(correct_predictions+incorrect_predictions).to_f probabilities << p[0] end R.assign "accuracy", accuracies R.assign "probability", probabilities R.eval "image = qplot(probability,accuracy)+ylab('Accumulated accuracy')+xlab('Prediction probability')+ylim(c(0,1))+scale_x_reverse()+geom_line()" R.eval "ggsave(file='#{tmpfile}', plot=image)" file = Mongo::Grid::File.new(File.read(tmpfile), :filename => "#{self.id.to_s}_probability_plot.svg") plot_id = $gridfs.insert_one(file) update(:probability_plot_id => plot_id) #end $gridfs.find_one(_id: probability_plot_id).data end |
#statistics ⇒ Hash
Get statistics
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
# File 'lib/validation-statistics.rb', line 8 def statistics self.accept_values = model.prediction_feature.accept_values self.confusion_matrix = Array.new(accept_values.size){Array.new(accept_values.size,0)} self.weighted_confusion_matrix = Array.new(accept_values.size){Array.new(accept_values.size,0)} nr_instances = 0 predictions.each do |cid,pred| # TODO # use predictions without probabilities (single neighbor)?? # use measured majority class?? if pred[:measurements].uniq.size == 1 and pred[:probabilities] m = pred[:measurements].first if pred[:value] == m if pred[:value] == accept_values[0] confusion_matrix[0][0] += 1 weighted_confusion_matrix[0][0] += pred[:probabilities][pred[:value]] nr_instances += 1 elsif pred[:value] == accept_values[1] confusion_matrix[1][1] += 1 weighted_confusion_matrix[1][1] += pred[:probabilities][pred[:value]] nr_instances += 1 end elsif pred[:value] != m if pred[:value] == accept_values[0] confusion_matrix[0][1] += 1 weighted_confusion_matrix[0][1] += pred[:probabilities][pred[:value]] nr_instances += 1 elsif pred[:value] == accept_values[1] confusion_matrix[1][0] += 1 weighted_confusion_matrix[1][0] += pred[:probabilities][pred[:value]] nr_instances += 1 end end end end self.true_rate = {} self.predictivity = {} accept_values.each_with_index do |v,i| self.true_rate[v] = confusion_matrix[i][i]/confusion_matrix[i].reduce(:+).to_f self.predictivity[v] = confusion_matrix[i][i]/confusion_matrix.collect{|n| n[i]}.reduce(:+).to_f end confidence_sum = 0 weighted_confusion_matrix.each do |r| r.each do |c| confidence_sum += c end end self.accuracy = (confusion_matrix[0][0]+confusion_matrix[1][1])/nr_instances.to_f self.weighted_accuracy = (weighted_confusion_matrix[0][0]+weighted_confusion_matrix[1][1])/confidence_sum.to_f $logger.debug "Accuracy #{accuracy}" save { :accept_values => accept_values, :confusion_matrix => confusion_matrix, :weighted_confusion_matrix => weighted_confusion_matrix, :accuracy => accuracy, :weighted_accuracy => weighted_accuracy, :true_rate => self.true_rate, :predictivity => self.predictivity, } end |