Class: OpenTox::Compound

Inherits:
Object show all
Includes:
OpenTox
Defined in:
lib/compound.rb

Constant Summary collapse

DEFAULT_FINGERPRINT =
"MP2D"

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.find_or_create_by(params) ⇒ Object

Overwrites standard Mongoid method to create fingerprints before database insertion



29
30
31
32
33
34
# File 'lib/compound.rb', line 29

def self.find_or_create_by params
  compound = self.find_or_initialize_by params
  compound.default_fingerprint_size = compound.fingerprint(DEFAULT_FINGERPRINT).size
  compound.save
  compound
end

.from_inchi(inchi) ⇒ OpenTox::Compound

Create a compound from inchi string

Parameters:

  • inchi (String)

    smiles InChI string

Returns:



144
145
146
147
148
149
150
151
152
153
154
155
# File 'lib/compound.rb', line 144

def self.from_inchi inchi
  # Temporary workaround for OpenBabels Inchi bug
  # http://sourceforge.net/p/openbabel/bugs/957/
  # bug has not been fixed in latest git/development version
  #smiles = `echo "#{inchi}" | "#{File.join(File.dirname(__FILE__),"..","openbabel","bin","babel")}" -iinchi - -ocan`.chomp.strip
  smiles = obconversion(inchi,"inchi","can")
  if smiles.empty?
    Compound.find_or_create_by(:warnings => ["InChi parsing failed for #{inchi}, this may be caused by an incorrect InChi string or a bug in OpenBabel libraries."])
  else
    Compound.find_or_create_by(:smiles => smiles, :inchi => inchi)
  end
end

.from_name(name) ⇒ OpenTox::Compound

Create a compound from name. Relies on an external service for name lookups.

Examples:

compound = OpenTox::Compound.from_name("Benzene")

Parameters:

  • name (String)

    can be also an InChI/InChiKey, CAS number, etc

Returns:



170
171
172
# File 'lib/compound.rb', line 170

def self.from_name name
  Compound.from_smiles RestClientWrapper.get(File.join(CACTUS_URI,URI.escape(name),"smiles"))
end

.from_sdf(sdf) ⇒ OpenTox::Compound

Create a compound from sdf string

Parameters:

  • sdf (String)

    smiles SDF string

Returns:



160
161
162
163
# File 'lib/compound.rb', line 160

def self.from_sdf sdf
  # do not store sdf because it might be 2D
  Compound.from_smiles obconversion(sdf,"sdf","can")
end

.from_smiles(smiles) ⇒ OpenTox::Compound

Create a compound from smiles string

Examples:

compound = OpenTox::Compound.from_smiles("c1ccccc1")

Parameters:

  • smiles (String)

    Smiles string

Returns:



127
128
129
130
131
132
133
134
135
136
137
138
139
# File 'lib/compound.rb', line 127

def self.from_smiles smiles
  if smiles.match(/\s/) # spaces seem to confuse obconversion and may lead to invalid smiles
    $logger.warn "SMILES parsing failed for '#{smiles}'', SMILES string contains whitespaces."
    return nil
  end
  smiles = obconversion(smiles,"smi","can") # test if SMILES is correct and return canonical smiles (for compound comparisons)
  if smiles.empty?
    $logger.warn "SMILES parsing failed for '#{smiles}'', this may be caused by an incorrect SMILES string."
    return nil
  else
    Compound.find_or_create_by :smiles => smiles 
  end
end

Instance Method Details

#chemblidString

Returns ChEMBL database compound id, derieved via restcall to chembl.

Returns:

  • (String)

    ChEMBL database compound id, derieved via restcall to chembl



255
256
257
258
259
260
# File 'lib/compound.rb', line 255

def chemblid
  # https://www.ebi.ac.uk/chembldb/ws#individualCompoundByInChiKey
  uri = "http://www.ebi.ac.uk/chemblws/compounds/smiles/#{smiles}.json"
  update(:chemblid => JSON.parse(RestClientWrapper.get(uri))["compounds"].first["chemblId"]) unless self["chemblid"] 
  self["chemblid"]
end

#cidString

Returns PubChem Compound Identifier (CID), derieved via restcall to pubchem.

Returns:

  • (String)

    PubChem Compound Identifier (CID), derieved via restcall to pubchem



248
249
250
251
252
# File 'lib/compound.rb', line 248

def cid
  pug_uri = "http://pubchem.ncbi.nlm.nih.gov/rest/pug/"
  update(:cid => RestClientWrapper.post(File.join(pug_uri, "compound", "inchi", "cids", "TXT"),{:inchi => inchi}).strip) unless self["cid"] 
  self["cid"]
end

#db_neighbors(params) ⇒ Object



320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
# File 'lib/compound.rb', line 320

def db_neighbors params
  # from http://blog.matt-swain.com/post/87093745652/chemical-similarity-search-in-mongodb

  #qn = default_fingerprint_size
  #qmin = qn * threshold
  #qmax = qn / threshold
  #not sure if it is worth the effort of keeping feature counts up to date (compound deletions, additions, ...)
  #reqbits = [count['_id'] for count in db.mfp_counts.find({'_id': {'$in': qfp}}).sort('count', 1).limit(qn - qmin + 1)]
  aggregate = [
    #{'$match': {'mfp.count': {'$gte': qmin, '$lte': qmax}, 'mfp.bits': {'$in': reqbits}}},
    #{'$match' =>  {'_id' => {'$ne' => self.id}}}, # remove self
    {'$project' => {
      'tanimoto' => {'$let' => {
        'vars' => {'common' => {'$size' => {'$setIntersection' => ["$fingerprints.#{DEFAULT_FINGERPRINT}", fingerprints[DEFAULT_FINGERPRINT]]}}},
        #'vars' => {'common' => {'$size' => {'$setIntersection' => ["$default_fingerprint", default_fingerprint]}}},
        'in' => {'$divide' => ['$$common', {'$subtract' => [{'$add' => [default_fingerprint_size, '$default_fingerprint_size']}, '$$common']}]}
      }},
      '_id' => 1,
      'features' => 1,
      'dataset_ids' => 1
    }},
    {'$match' =>  {'tanimoto' => {'$gte' => params[:min_sim]}}},
    {'$sort' => {'tanimoto' => -1}}
  ]
  
  $mongo["compounds"].aggregate(aggregate).select{|r| r["dataset_ids"].include? params[:training_dataset_id]}
    
end

#fingerprint(type = DEFAULT_FINGERPRINT) ⇒ Object



36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# File 'lib/compound.rb', line 36

def fingerprint type=DEFAULT_FINGERPRINT
  unless fingerprints[type]
    return [] unless self.smiles
    #http://openbabel.org/docs/dev/FileFormats/MolPrint2D_format.html#molprint2d-format
    if type == "MP2D"
      fp = obconversion(smiles,"smi","mpd").strip.split("\t")
      name = fp.shift # remove Title
      fingerprints[type] = fp.uniq # no fingerprint counts
    #http://openbabel.org/docs/dev/FileFormats/Multilevel_Neighborhoods_of_Atoms_(MNA).html
    elsif type== "MNA"
      level = 2 # TODO: level as parameter, evaluate level 1, see paper
      fp = obconversion(smiles,"smi","mna","xL\"#{level}\"").split("\n")
      fp.shift # remove Title
      fingerprints[type] = fp
    else # standard fingerprints
      fp = OpenBabel::OBFingerprint.find_fingerprint(type)
      obmol = OpenBabel::OBMol.new
      obconversion = OpenBabel::OBConversion.new
      obconversion.set_in_format "smi"
      obconversion.read_string obmol, self.smiles
      result = OpenBabel::VectorUnsignedInt.new
      fp.get_fingerprint(obmol,result)
      # TODO: %ignore *::DescribeBits @ line 163 openbabel/scripts/openbabel-ruby.i
      #p OpenBabel::OBFingerprint.describe_bits(result)
      # convert result to a list of the bits that are set
      # from openbabel/scripts/python/pybel.py line 830
      # see also http://openbabel.org/docs/dev/UseTheLibrary/Python_Pybel.html#fingerprints
      result = result.to_a
      bitsperint = OpenBabel::OBFingerprint.getbitsperint()
      bits_set = []
      start = 1
      result.each do |x|
        i = start
        while x > 0 do
          bits_set << i if (x % 2) == 1
          x >>= 1
          i += 1
        end
        start += bitsperint
      end
      fingerprints[type] = bits_set
    end
    save
  end
  fingerprints[type]
end

#fingerprint_count_neighbors(params) ⇒ Object



262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
# File 'lib/compound.rb', line 262

def fingerprint_count_neighbors params
  # TODO fix
  neighbors = []
  query_fingerprint = self.fingerprint params[:type]
  training_dataset = Dataset.find(params[:training_dataset_id]).compounds.each do |compound|
    unless self == compound
      candidate_fingerprint = compound.fingerprint params[:type]
      features = (query_fingerprint + candidate_fingerprint).uniq
      min_sum = 0
      max_sum = 0
      features.each do |f|
        min,max = [query_fingerprint.count(f),candidate_fingerprint.count(f)].minmax
        min_sum += min
        max_sum += max
      end
      max_sum == 0 ? sim = 0 : sim = min_sum/max_sum.to_f
      neighbors << [compound.id, sim] if sim and sim >= params[:min_sim]
    end
  end
  neighbors.sort{|a,b| b.last <=> a.last}
end

#fingerprint_neighbors(params) ⇒ Object



284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
# File 'lib/compound.rb', line 284

def fingerprint_neighbors params
  bad_request_error "Incorrect parameters '#{params}' for Compound#fingerprint_neighbors. Please provide :type, :training_dataset_id, :min_sim." unless params[:type] and params[:training_dataset_id] and params[:min_sim]
  neighbors = []
  if params[:type] == DEFAULT_FINGERPRINT
    neighbors = db_neighbors params
  else 
    query_fingerprint = self.fingerprint params[:type]
    training_dataset = Dataset.find(params[:training_dataset_id])
    prediction_feature = training_dataset.features.first
    training_dataset.compounds.each do |compound|
      candidate_fingerprint = compound.fingerprint params[:type]
      sim = (query_fingerprint & candidate_fingerprint).size/(query_fingerprint | candidate_fingerprint).size.to_f
      feature_values = training_dataset.values(compound,prediction_feature)
      neighbors << {"_id" => compound.id, "features" => {prediction_feature.id.to_s => feature_values}, "tanimoto" => sim} if sim >= params[:min_sim]
    end
    neighbors.sort!{|a,b| b["tanimoto"] <=> a["tanimoto"]}
  end
  neighbors
end

#inchiString

Get InChI

Returns:



176
177
178
179
180
181
182
183
184
# File 'lib/compound.rb', line 176

def inchi
  unless self["inchi"]

    result = obconversion(smiles,"smi","inchi")
    #result = `echo "#{self.smiles}" | "#{File.join(File.dirname(__FILE__),"..","openbabel","bin","babel")}" -ismi - -oinchi`.chomp
    update(:inchi => result.chomp) if result and !result.empty?
  end
  self["inchi"]
end

#inchikeyString

Get InChIKey

Returns:

  • (String)

    InChIKey string



188
189
190
191
# File 'lib/compound.rb', line 188

def inchikey
  update(:inchikey => obconversion(smiles,"smi","inchikey")) unless self["inchikey"]
  self["inchikey"]
end

#mg_to_mmol(mg) ⇒ Float

Convert mmol to mg

Returns:

  • (Float)

    value in mg



357
358
359
# File 'lib/compound.rb', line 357

def mg_to_mmol mg
  mg.to_f/molecular_weight
end

#mmol_to_mg(mmol) ⇒ Float

Convert mg to mmol

Returns:

  • (Float)

    value in mg



351
352
353
# File 'lib/compound.rb', line 351

def mmol_to_mg mmol
  mmol.to_f*molecular_weight
end

#molecular_weightFloat

Calculate molecular weight of Compound with OB and store it in object

Returns:

  • (Float)

    molecular weight



363
364
365
366
# File 'lib/compound.rb', line 363

def molecular_weight
  mw_feature = PhysChem.find_or_create_by(:name => "Openbabel.MW")
  physchem([mw_feature])[mw_feature.id.to_s]
end

#namesString

Get all known compound names. Relies on an external service for name lookups.

Examples:

names = compound.names

Returns:



242
243
244
245
# File 'lib/compound.rb', line 242

def names
  update(:names => RestClientWrapper.get("#{CACTUS_URI}#{inchi}/names").split("\n")) unless self["names"] 
  self["names"]
end

#physchem(descriptors = PhysChem.openbabel_descriptors) ⇒ Object



83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# File 'lib/compound.rb', line 83

def physchem descriptors=PhysChem.openbabel_descriptors
  # TODO: speedup java descriptors
  calculated_ids = physchem_descriptors.keys
  # BSON::ObjectId instances are not allowed as keys in a BSON document.
  new_ids = descriptors.collect{|d| d.id.to_s} - calculated_ids
  descs = {}
  algos = {}
  new_ids.each do |id|
    descriptor = PhysChem.find id
    descs[[descriptor.library, descriptor.descriptor]]  = descriptor
    algos[descriptor.name] = descriptor
  end
  # avoid recalculating Cdk features with multiple values
  descs.keys.uniq.each do |k|
    descs[k].send(k[0].downcase,k[1],self).each do |n,v|
      physchem_descriptors[algos[n].id.to_s] = v # BSON::ObjectId instances are not allowed as keys in a BSON document.
    end
  end
  save
  physchem_descriptors.select{|id,v| descriptors.collect{|d| d.id.to_s}.include? id}
end

#physchem_neighbors(params) ⇒ Object



304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
# File 'lib/compound.rb', line 304

def physchem_neighbors params
  feature_dataset = Dataset.find params[:feature_dataset_id]
  query_fingerprint = Algorithm.run params[:feature_calculation_algorithm], self, params[:descriptors]
  neighbors = []
  feature_dataset.data_entries.each_with_index do |candidate_fingerprint, i|
    # TODO implement pearson and cosine similarity separatly
    R.assign "x", query_fingerprint
    R.assign "y", candidate_fingerprint
    sim = R.eval("x %*% y / sqrt(x%*%x * y%*%y)").to_ruby.first
    if sim >= params[:min_sim]
      neighbors << [feature_dataset.compound_ids[i],sim] # use compound_ids, instantiation of Compounds is too time consuming
    end
  end
  neighbors
end

#pngimage/png

Get png image

Examples:

image = compound.png

Returns:

  • (image/png)

    Image data



228
229
230
231
232
233
234
235
236
# File 'lib/compound.rb', line 228

def png
  if self.png_id.nil?
   png = obconversion(smiles,"smi","_png2")
   file = Mongo::Grid::File.new(Base64.encode64(png), :filename => "#{id}.png", :content_type => "image/png")
   update(:png_id => $gridfs.insert_one(file))
  end
  Base64.decode64($gridfs.find_one(_id: self.png_id).data)

end

#sdfString

Get sdf

Returns:



202
203
204
205
206
207
208
209
210
# File 'lib/compound.rb', line 202

def sdf
  if self.sdf_id.nil? 
    sdf = obconversion(smiles,"smi","sdf")
    file = Mongo::Grid::File.new(sdf, :filename => "#{id}.sdf",:content_type => "chemical/x-mdl-sdfile")
    sdf_id = $gridfs.insert_one file
    update :sdf_id => sdf_id
  end
  $gridfs.find_one(_id: self.sdf_id).data
end

#smarts_match(smarts, count = false) ⇒ Object



105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# File 'lib/compound.rb', line 105

def smarts_match smarts, count=false
  obconversion = OpenBabel::OBConversion.new
  obmol = OpenBabel::OBMol.new
  obconversion.set_in_format('smi')
  obconversion.read_string(obmol,self.smiles)
  smarts_pattern = OpenBabel::OBSmartsPattern.new
  smarts.collect do |sma|
    smarts_pattern.init(sma.smarts)
    if smarts_pattern.match(obmol)
      count ? value = smarts_pattern.get_map_list.to_a.size : value = 1
    else
      value = 0 
    end
    value
  end
end

#smilesString

Get (canonical) smiles

Returns:



195
196
197
198
# File 'lib/compound.rb', line 195

def smiles
  update(:smiles => obconversion(self["smiles"],"smi","can")) unless self["smiles"] 
  self["smiles"]
end

#svgimage/svg

Get SVG image

Returns:

  • (image/svg)

    Image data



214
215
216
217
218
219
220
221
222
# File 'lib/compound.rb', line 214

def svg
  if self.svg_id.nil?
   svg = obconversion(smiles,"smi","svg")
   file = Mongo::Grid::File.new(svg, :filename => "#{id}.svg", :content_type => "image/svg")
   update(:svg_id => $gridfs.insert_one(file))
  end
  $gridfs.find_one(_id: self.svg_id).data

end