Module: OpenTox::Validation::ClassificationStatistics
- Defined in:
- lib/validation-statistics.rb
Overview
Statistical evaluation of classification validations
Instance Method Summary collapse
-
#probability_plot(format: "pdf") ⇒ Blob
Plot accuracy vs prediction probability.
-
#statistics ⇒ Hash
Get statistics.
Instance Method Details
#probability_plot(format: "pdf") ⇒ Blob
Plot accuracy vs prediction probability
76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
# File 'lib/validation-statistics.rb', line 76 def probability_plot format: "pdf" #unless probability_plot_id #tmpdir = File.join(ENV["HOME"], "tmp") tmpdir = "/tmp" #p tmpdir FileUtils.mkdir_p tmpdir tmpfile = File.join(tmpdir,"#{id.to_s}_probability.#{format}") accuracies = [] probabilities = [] correct_predictions = 0 incorrect_predictions = 0 pp = [] predictions.values.select{|p| p["probabilities"]}.compact.each do |p| p["measurements"].each do |m| pp << [ p["probabilities"][p["value"]], p["value"] == m ] end end pp.sort_by!{|p| 1-p.first} pp.each do |p| p[1] ? correct_predictions += 1 : incorrect_predictions += 1 accuracies << correct_predictions/(correct_predictions+incorrect_predictions).to_f probabilities << p[0] end R.assign "accuracy", accuracies R.assign "probability", probabilities R.eval "image = qplot(probability,accuracy)+ylab('Accumulated accuracy')+xlab('Prediction probability')+ylim(c(0,1))+scale_x_reverse()+geom_line()" R.eval "ggsave(file='#{tmpfile}', plot=image)" file = Mongo::Grid::File.new(File.read(tmpfile), :filename => "#{self.id.to_s}_probability_plot.#{format}") plot_id = $gridfs.insert_one(file) update(:probability_plot_id => plot_id) #end $gridfs.find_one(_id: probability_plot_id).data end |
#statistics ⇒ Hash
Get statistics
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
# File 'lib/validation-statistics.rb', line 8 def statistics self.accept_values = model.prediction_feature.accept_values self.confusion_matrix = {:all => Array.new(accept_values.size){Array.new(accept_values.size,0)}, :confidence_high => Array.new(accept_values.size){Array.new(accept_values.size,0)}, :confidence_low => Array.new(accept_values.size){Array.new(accept_values.size,0)}} self.nr_predictions = {:all => 0,:confidence_high => 0,:confidence_low => 0} predictions.each do |cid,pred| # TODO: use measured majority class or all measurements?? if pred[:measurements].uniq.size == 1 and pred[:probabilities] m = pred[:measurements].first if pred[:value] == m accept_values.each_with_index do |v,i| if pred[:value] == v confusion_matrix[:all][i][i] += 1 self.nr_predictions[:all] += 1 if pred[:confidence].match(/Similar/i) confusion_matrix[:confidence_high][i][i] += 1 self.nr_predictions[:confidence_high] += 1 elsif pred[:confidence].match(/Low/i) confusion_matrix[:confidence_low][i][i] += 1 self.nr_predictions[:confidence_low] += 1 end end end elsif pred[:value] != m accept_values.each_with_index do |v,i| if pred[:value] == v confusion_matrix[:all][i][(i+1)%2] += 1 self.nr_predictions[:all] += 1 if pred[:confidence].match(/Similar/i) confusion_matrix[:confidence_high][i][(i+1)%2] += 1 self.nr_predictions[:confidence_high] += 1 elsif pred[:confidence].match(/Low/i) confusion_matrix[:confidence_low][i][(i+1)%2] += 1 self.nr_predictions[:confidence_low] += 1 end end end end end end self.true_rate = {:all => {}, :confidence_high => {}, :confidence_low => {}} self.predictivity = {:all => {}, :confidence_high => {}, :confidence_low => {}} accept_values.each_with_index do |v,i| [:all,:confidence_high,:confidence_low].each do |a| self.true_rate[a][v] = confusion_matrix[a][i][i]/confusion_matrix[a][i].reduce(:+).to_f self.predictivity[a][v] = confusion_matrix[a][i][i]/confusion_matrix[a].collect{|n| n[i]}.reduce(:+).to_f end end self.accuracy = {} [:all,:confidence_high,:confidence_low].each do |a| self.accuracy[a] = (confusion_matrix[a][0][0]+confusion_matrix[a][1][1])/nr_predictions[a].to_f end $logger.debug "Accuracy #{accuracy}" $logger.debug "Nr Predictions #{nr_predictions}" save { :accept_values => accept_values, :confusion_matrix => confusion_matrix, :accuracy => accuracy, :true_rate => self.true_rate, :predictivity => self.predictivity, :nr_predictions => nr_predictions, } end |