Class: OpenTox::Compound

Inherits:
Substance show all
Defined in:
lib/compound.rb

Overview

Small molecules with defined chemical structures

Constant Summary collapse

DEFAULT_FINGERPRINT =
"MP2D"

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.find_or_create_by(params) ⇒ Object

Overwrites standard Mongoid method to create fingerprints before database insertion



25
26
27
28
29
30
# File 'lib/compound.rb', line 25

def self.find_or_create_by params
  compound = self.find_or_initialize_by params
  compound.default_fingerprint_size = compound.fingerprint(DEFAULT_FINGERPRINT).size
  compound.save
  compound
end

.from_inchi(inchi) ⇒ OpenTox::Compound

Create a compound from InChI string

Parameters:

Returns:



149
150
151
152
153
154
155
156
157
# File 'lib/compound.rb', line 149

def self.from_inchi inchi
  #smiles = `echo "#{inchi}" | "#{File.join(File.dirname(__FILE__),"..","openbabel","bin","babel")}" -iinchi - -ocan`.chomp.strip
  smiles = obconversion(inchi,"inchi","can")
  if smiles.empty?
    Compound.find_or_create_by(:warnings => ["InChi parsing failed for #{inchi}, this may be caused by an incorrect InChi string or a bug in OpenBabel libraries."])
  else
    Compound.find_or_create_by(:smiles => smiles, :inchi => inchi)
  end
end

.from_name(name) ⇒ OpenTox::Compound

Create a compound from name. Relies on an external service for name lookups.

Examples:

compound = OpenTox::Compound.from_name("Benzene")

Parameters:

  • name, (String)

    can be also an InChI/InChiKey, CAS number, etc

Returns:



172
173
174
# File 'lib/compound.rb', line 172

def self.from_name name
  Compound.from_smiles RestClientWrapper.get(File.join(CACTUS_URI,URI.escape(name),"smiles"))
end

.from_sdf(sdf) ⇒ OpenTox::Compound

Create a compound from SDF

Parameters:

Returns:



162
163
164
165
# File 'lib/compound.rb', line 162

def self.from_sdf sdf
  # do not store sdf because it might be 2D
  Compound.from_smiles obconversion(sdf,"sdf","can")
end

.from_smiles(smiles) ⇒ OpenTox::Compound

Create a compound from smiles string

Examples:

compound = OpenTox::Compound.from_smiles("c1ccccc1")

Parameters:

Returns:



132
133
134
135
136
137
138
139
140
141
142
143
144
# File 'lib/compound.rb', line 132

def self.from_smiles smiles
  if smiles.match(/\s/) # spaces seem to confuse obconversion and may lead to invalid smiles
    $logger.warn "SMILES parsing failed for '#{smiles}'', SMILES string contains whitespaces."
    return nil
  end
  smiles = obconversion(smiles,"smi","can") # test if SMILES is correct and return canonical smiles (for compound comparisons)
  if smiles.empty?
    $logger.warn "SMILES parsing failed for '#{smiles}'', this may be caused by an incorrect SMILES string."
    return nil
  else
    Compound.find_or_create_by :smiles => smiles 
  end
end

Instance Method Details

#calculate_properties(descriptors = PhysChem::OPENBABEL) ⇒ Array<Float>

Calculate physchem properties

Parameters:

  • list (Array<Hash>)

    of descriptors

Returns:



85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# File 'lib/compound.rb', line 85

def calculate_properties descriptors=PhysChem::OPENBABEL
  calculated_ids = properties.keys
  # BSON::ObjectId instances are not allowed as keys in a BSON document.
  new_ids = descriptors.collect{|d| d.id.to_s} - calculated_ids
  descs = {}
  algos = {}
  new_ids.each do |id|
    descriptor = PhysChem.find id
    descs[[descriptor.library, descriptor.descriptor]]  = descriptor
    algos[descriptor.name] = descriptor
  end
  # avoid recalculating Cdk features with multiple values
  descs.keys.uniq.each do |k|
    descs[k].send(k[0].downcase,k[1],self).each do |n,v|
      properties[algos[n].id.to_s] = v # BSON::ObjectId instances are not allowed as keys in a BSON document.
    end
  end
  save
  descriptors.collect{|d| properties[d.id.to_s]}
end

#chemblidString

Get ChEMBL database compound id, obtained via REST call to ChEMBL

Returns:



255
256
257
258
259
260
# File 'lib/compound.rb', line 255

def chemblid
  # https://www.ebi.ac.uk/chembldb/ws#individualCompoundByInChiKey
  uri = "https://www.ebi.ac.uk/chemblws/compounds/smiles/#{smiles}.json"
  update(:chemblid => JSON.parse(RestClientWrapper.get(uri))["compounds"].first["chemblId"]) unless self["chemblid"] 
  self["chemblid"]
end

#cidString

Get PubChem Compound Identifier (CID), obtained via REST call to PubChem

Returns:



247
248
249
250
251
# File 'lib/compound.rb', line 247

def cid
  pug_uri = "https://pubchem.ncbi.nlm.nih.gov/rest/pug/"
  update(:cid => RestClientWrapper.post(File.join(pug_uri, "compound", "inchi", "cids", "TXT"),{:inchi => inchi}).strip) unless self["cid"] 
  self["cid"]
end

#db_neighbors(min_sim: 0.1, dataset_id:) ⇒ Object



262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
# File 'lib/compound.rb', line 262

def db_neighbors min_sim: 0.1, dataset_id:
  #p fingerprints[DEFAULT_FINGERPRINT]
  # from http://blog.matt-swain.com/post/87093745652/chemical-similarity-search-in-mongodb

  #qn = default_fingerprint_size
  #qmin = qn * threshold
  #qmax = qn / threshold
  #not sure if it is worth the effort of keeping feature counts up to date (compound deletions, additions, ...)
  #reqbits = [count['_id'] for count in db.mfp_counts.find({'_id': {'$in': qfp}}).sort('count', 1).limit(qn - qmin + 1)]
  aggregate = [
    #{'$match': {'mfp.count': {'$gte': qmin, '$lte': qmax}, 'mfp.bits': {'$in': reqbits}}},
    #{'$match' =>  {'_id' => {'$ne' => self.id}}}, # remove self
    {'$project' => {
      'similarity' => {'$let' => {
        'vars' => {'common' => {'$size' => {'$setIntersection' => ["$fingerprints.#{DEFAULT_FINGERPRINT}", fingerprints[DEFAULT_FINGERPRINT]]}}},
        'in' => {'$divide' => ['$$common', {'$subtract' => [{'$add' => [default_fingerprint_size, '$default_fingerprint_size']}, '$$common']}]}
      }},
      '_id' => 1,
      #'measurements' => 1,
      'dataset_ids' => 1
    }},
    {'$match' =>  {'similarity' => {'$gte' => min_sim}}},
    {'$sort' => {'similarity' => -1}}
  ]

  # TODO move into aggregate pipeline, see http://stackoverflow.com/questions/30537317/mongodb-aggregation-match-if-value-in-array
  $mongo["substances"].aggregate(aggregate).select{|r| r["dataset_ids"].include? dataset_id}
    
end

#fingerprint(type = DEFAULT_FINGERPRINT) ⇒ Array<String>

Create chemical fingerprint

Parameters:

  • fingerprint (String)

    type

Returns:



35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# File 'lib/compound.rb', line 35

def fingerprint type=DEFAULT_FINGERPRINT
  unless fingerprints[type]
    return [] unless self.smiles
    #http://openbabel.org/docs/dev/FileFormats/MolPrint2D_format.html#molprint2d-format
    if type == "MP2D"
      fp = obconversion(smiles,"smi","mpd").strip.split("\t")
      name = fp.shift # remove Title
      fingerprints[type] = fp.uniq # no fingerprint counts
    #http://openbabel.org/docs/dev/FileFormats/Multilevel_Neighborhoods_of_Atoms_(MNA).html
    elsif type== "MNA"
      level = 2 # TODO: level as parameter, evaluate level 1, see paper
      fp = obconversion(smiles,"smi","mna","xL\"#{level}\"").split("\n")
      fp.shift # remove Title
      fingerprints[type] = fp
    else # standard fingerprints
      fp = OpenBabel::OBFingerprint.find_fingerprint(type)
      obmol = OpenBabel::OBMol.new
      obconversion = OpenBabel::OBConversion.new
      obconversion.set_in_format "smi"
      obconversion.read_string obmol, self.smiles
      result = OpenBabel::VectorUnsignedInt.new
      fp.get_fingerprint(obmol,result)
      # TODO: %ignore *::DescribeBits @ line 163 openbabel/scripts/openbabel-ruby.i
      #p OpenBabel::OBFingerprint.describe_bits(result)
      # convert result to a list of the bits that are set
      # from openbabel/scripts/python/pybel.py line 830
      # see also http://openbabel.org/docs/dev/UseTheLibrary/Python_Pybel.html#fingerprints
      result = result.to_a
      bitsperint = OpenBabel::OBFingerprint.getbitsperint()
      bits_set = []
      start = 1
      result.each do |x|
        i = start
        while x > 0 do
          bits_set << i if (x % 2) == 1
          x >>= 1
          i += 1
        end
        start += bitsperint
      end
      fingerprints[type] = bits_set
    end
    save
  end
  fingerprints[type]
end

#inchiString

Get InChI

Returns:



178
179
180
181
182
183
184
# File 'lib/compound.rb', line 178

def inchi
  unless self["inchi"]
    result = obconversion(smiles,"smi","inchi")
    update(:inchi => result.chomp) if result and !result.empty?
  end
  self["inchi"]
end

#inchikeyString

Get InChIKey

Returns:



188
189
190
191
# File 'lib/compound.rb', line 188

def inchikey
  update(:inchikey => obconversion(smiles,"smi","inchikey")) unless self["inchikey"]
  self["inchikey"]
end

#mg_to_mmol(mg) ⇒ Float

Convert mg to mmol

Returns:

  • (Float)

    value in mmol



300
301
302
# File 'lib/compound.rb', line 300

def mg_to_mmol mg
  mg.to_f/molecular_weight
end

#mmol_to_mg(mmol) ⇒ Float

Convert mmol to mg

Returns:

  • (Float)

    value in mg



294
295
296
# File 'lib/compound.rb', line 294

def mmol_to_mg mmol
  mmol.to_f*molecular_weight
end

#molecular_weightFloat

Calculate molecular weight of Compound with OB and store it in compound object

Returns:

  • (Float)

    molecular weight



306
307
308
309
# File 'lib/compound.rb', line 306

def molecular_weight
  mw_feature = PhysChem.find_or_create_by(:name => "Openbabel.MW")
  calculate_properties([mw_feature]).first
end

#namesArray<String>

Get all known compound names. Relies on an external service for name lookups.

Examples:

names = compound.names

Returns:



240
241
242
243
# File 'lib/compound.rb', line 240

def names
  update(:names => RestClientWrapper.get("#{CACTUS_URI}#{inchi}/names").split("\n")) unless self["names"] 
  self["names"]
end

#pngimage/png

Get png image

Examples:

image = compound.png

Returns:

  • (image/png)

    Image data



227
228
229
230
231
232
233
234
# File 'lib/compound.rb', line 227

def png
  if self.png_id.nil?
   png = obconversion(smiles,"smi","_png2")
   file = Mongo::Grid::File.new(Base64.encode64(png), :filename => "#{id}.png", :content_type => "image/png")
   update(:png_id => $gridfs.insert_one(file))
  end
  Base64.decode64($gridfs.find_one(_id: self.png_id).data)
end

#sdfString

Get SDF

Returns:



202
203
204
205
206
207
208
209
210
# File 'lib/compound.rb', line 202

def sdf
  if self.sdf_id.nil? 
    sdf = obconversion(smiles,"smi","sdf")
    file = Mongo::Grid::File.new(sdf, :filename => "#{id}.sdf",:content_type => "chemical/x-mdl-sdfile")
    sdf_id = $gridfs.insert_one file
    update :sdf_id => sdf_id
  end
  $gridfs.find_one(_id: self.sdf_id).data
end

#smarts_match(smarts, count = false) ⇒ TrueClass, ...

Match a SMARTS substructure

Parameters:

  • smarts (String)
  • count (TrueClass, FalseClass) (defaults to: false)

    matches or return true/false

Returns:

  • (TrueClass, FalseClass, Fixnum)


110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# File 'lib/compound.rb', line 110

def smarts_match smarts, count=false
  obconversion = OpenBabel::OBConversion.new
  obmol = OpenBabel::OBMol.new
  obconversion.set_in_format('smi')
  obconversion.read_string(obmol,self.smiles)
  smarts_pattern = OpenBabel::OBSmartsPattern.new
  smarts.collect do |sma|
    smarts_pattern.init(sma.smarts)
    if smarts_pattern.match(obmol)
      count ? value = smarts_pattern.get_map_list.to_a.size : value = 1
    else
      value = 0 
    end
    value
  end
end

#smilesString

Get (canonical) smiles

Returns:



195
196
197
198
# File 'lib/compound.rb', line 195

def smiles
  update(:smiles => obconversion(self["smiles"],"smi","can")) unless self["smiles"] 
  self["smiles"]
end

#svgimage/svg

Get SVG image

Returns:

  • (image/svg)

    Image data



214
215
216
217
218
219
220
221
# File 'lib/compound.rb', line 214

def svg
  if self.svg_id.nil?
   svg = obconversion(smiles,"smi","svg")
   file = Mongo::Grid::File.new(svg, :filename => "#{id}.svg", :content_type => "image/svg")
   update(:svg_id => $gridfs.insert_one(file))
  end
  $gridfs.find_one(_id: self.svg_id).data
end