Class: OpenTox::RegressionCrossValidation

Inherits:

CrossValidation

Object
CrossValidation
OpenTox::RegressionCrossValidation

show all

Defined in:: lib/crossvalidation.rb

Instance Method Summary collapse

Methods inherited from CrossValidation

create, #model, #time, #validations

Instance Method Details

#confidence_plot ⇒ `Object`

# File 'lib/crossvalidation.rb', line 246

def confidence_plot
  tmpfile = "/tmp/#{id.to_s}_confidence.png"
  sorted_predictions = predictions.collect{|p| [(Math.log10(p[1])-Math.log10(p[2])).abs,p[3]] if p[1] and p[2]}.compact
  R.assign "error", sorted_predictions.collect{|p| p[0]}
  R.assign "confidence", sorted_predictions.collect{|p| p[1]}
  # TODO fix axis names
  R.eval "image = qplot(confidence,error)"
  R.eval "image = image + stat_smooth(method='lm', se=FALSE)"
  R.eval "ggsave(file='#{tmpfile}', plot=image)"
  file = Mongo::Grid::File.new(File.read(tmpfile), :filename => "#{self.id.to_s}_confidence_plot.png")
  plot_id = $gridfs.insert_one(file)
  update(:confidence_plot_id => plot_id)
  $gridfs.find_one(_id: confidence_plot_id).data
end

#correlation_plot ⇒ `Object`

# File 'lib/crossvalidation.rb', line 261

def correlation_plot
  unless correlation_plot_id
    tmpfile = "/tmp/#{id.to_s}_correlation.png"
    x = predictions.collect{|p| p[1]}
    y = predictions.collect{|p| p[2]}
    attributes = Model::Lazar.find(self.model_id).attributes
    attributes.delete_if{|key,_| key.match(/_id|_at/) or ["_id","creator","name"].include? key}
    attributes = attributes.values.collect{|v| v.is_a?(String) ? v.sub(/OpenTox::/,'') : v}.join("\n")
    R.assign "measurement", x
    R.assign "prediction", y
    R.eval "all = c(-log(measurement),-log(prediction))"
    R.eval "range = c(min(all), max(all))"
    R.eval "image = qplot(-log(prediction),-log(measurement),main='#{self.name}',asp=1,xlim=range, ylim=range)"
    R.eval "image = image + geom_abline(intercept=0, slope=1)"
    R.eval "ggsave(file='#{tmpfile}', plot=image)"
    file = Mongo::Grid::File.new(File.read(tmpfile), :filename => "#{self.id.to_s}_correlation_plot.png")
    plot_id = $gridfs.insert_one(file)
    update(:correlation_plot_id => plot_id)
  end
  $gridfs.find_one(_id: correlation_plot_id).data
end

#misclassifications(n = nil) ⇒ `Object`

# File 'lib/crossvalidation.rb', line 216

def misclassifications n=nil
  #n = predictions.size unless n
  n ||= 10 
  model = Model::Lazar.find(self.model_id)
  training_dataset = Dataset.find(model.training_dataset_id)
  prediction_feature = training_dataset.features.first
  predictions.collect do |p|
    unless p.include? nil
      compound = Compound.find(p[0])
      neighbors = compound.send(model.neighbor_algorithm,model.neighbor_algorithm_parameters)
      neighbors.collect! do |n|
        neighbor = Compound.find(n[0])
        values = training_dataset.values(neighbor,prediction_feature)
        { :smiles => neighbor.smiles, :similarity => n[1], :measurements => values}
      end
      {
        :smiles => compound.smiles, 
        #:fingerprint => compound.fp4.collect{|id|  Smarts.find(id).name},
        :measured => p[1],
        :predicted => p[2],
        #:relative_error => (Math.log10(p[1])-Math.log10(p[2])).abs/Math.log10(p[1]).to_f.abs,
        :log_error => (Math.log10(p[1])-Math.log10(p[2])).abs,
        :relative_error => (p[1]-p[2]).abs/p[1],
        :confidence => p[3],
        :neighbors => neighbors
      }
    end
  end.compact.sort{|a,b| b[:relative_error] <=> a[:relative_error]}[0..n-1]
end

#statistics ⇒ `Object`

# File 'lib/crossvalidation.rb', line 171

def statistics
  rmse = 0
  mae = 0
  x = []
  y = []
  predictions.each do |pred|
    compound_id,activity,prediction,confidence = pred
    if activity and prediction 
      unless activity == [nil]
        x << -Math.log10(activity.median)
        y << -Math.log10(prediction)
        error = Math.log10(prediction)-Math.log10(activity.median)
        rmse += error**2
        #weighted_rmse += confidence*error**2
        mae += error.abs
        #weighted_mae += confidence*error.abs
        #confidence_sum += confidence
      end
    else
      warnings << "No training activities for #{Compound.find(compound_id).smiles} in training dataset #{model.training_dataset_id}."
      $logger.debug "No training activities for #{Compound.find(compound_id).smiles} in training dataset #{model.training_dataset_id}."
    end
  end
  R.assign "measurement", x
  R.assign "prediction", y
  R.eval "r <- cor(measurement,prediction,use='complete')"
  r = R.eval("r").to_ruby

  mae = mae/predictions.size
  #weighted_mae = weighted_mae/confidence_sum
  rmse = Math.sqrt(rmse/predictions.size)
  #weighted_rmse = Math.sqrt(weighted_rmse/confidence_sum)
  update_attributes(
    mae: mae,
    rmse: rmse,
    #weighted_mae: weighted_mae,
    #weighted_rmse: weighted_rmse,
    r_squared: r**2,
    finished_at: Time.now
  )
  $logger.debug "R^2 #{r**2}"
  $logger.debug "RMSE #{rmse}"
  $logger.debug "MAE #{mae}"
end

Class: OpenTox::RegressionCrossValidation

Instance Method Summary collapse

Methods inherited from CrossValidation

Instance Method Details

#confidence_plot ⇒ Object

#correlation_plot ⇒ Object

#misclassifications(n = nil) ⇒ Object

#statistics ⇒ Object

#confidence_plot ⇒ `Object`

#correlation_plot ⇒ `Object`

#misclassifications(n = nil) ⇒ `Object`

#statistics ⇒ `Object`