Module: Chem::Molecule

Includes:
Graph
Included in:
CDK::CDKMolecule, CDX::CDX, CMLMolecule, GSpanMolecule, KCF::RPairMolecule, KEGG::KCF, KEGG::KCFMolecule, KEGG::KeggCompound, Chem::MDL::MdlMolecule, Chem::MDL::MdlReaction, OpenBabel::OBMolecule, OpsinMolecule, PDB::PDBMolecule, SmilesMol, Sybyl::SybylMolecule, XYZ::XyzMolecule
Defined in:
lib/chem/db/cansmi.rb,
lib/chem.rb,
lib/chem/model.rb,
lib/chem/utils.rb,
lib/chem/db/eps.rb,
lib/chem/db/mdl.rb,
lib/chem/db/sybyl.rb,
lib/chem/db/vector.rb,
lib/chem/utils/cdk.rb,
lib/chem/utils/sub.rb,
lib/chem/db/pubchem.rb,
lib/chem/utils/prop.rb,
lib/chem/utils/sssr.rb,
lib/chem/utils/ullmann.rb,
lib/chem/utils/geometry.rb,
lib/chem/utils/traverse.rb,
lib/chem/utils/openbabel.rb,
lib/chem/utils/fingerprint.rb,
lib/chem/db/types/type_cansmi.rb

Overview

A module for assigning canonical smiles

Defined Under Namespace

Classes: EpsParameter

Constant Summary collapse

EpsHeader =
"%%!PS-Adobe-3.0 EPSF-3.0\n" +
"%%Creator: ChemRuby n.tanaka\n" + 
"%%For: Scientists\n" +
"%%Title: Molecular compound\n" +
"%%CreationDate: %d/%d/%d %d:%d \n"
MDLCountLineFormat =
"%3d%3d%3d%3d%3d%3d%3d%3d%3d  0999 V2000"
MDLHeaderLine2Format =
"%2s%8s%02d%02d%02d%02d%02d"
DESCRIPTORNAME =
'org.openscience.cdk.qsar.descriptors.molecular.'
ELEMNUM =

ELEMNUM =

  :C => 0,
  :N => 1,
  :O => 2,
  :P => 4
ELEMNUM.default = 32
Element2Number.inject({}) do |ret, (elem, num)|
  ret[elem] = 1 << num
  ret
end

Instance Attribute Summary collapse

Attributes included from Graph

#adjacencies, #edges, #nodes

Instance Method Summary collapse

Methods included from Graph

#adjacent_to, #clustering_coefficient, #each, #morgan, #terminal_nodes

Dynamic Method Handling

This class handles dynamic methods through the method_missing method

#method_missing(m, *args) ⇒ Object

Redirect methods to OpenBabel



127
128
129
130
131
132
# File 'lib/chem.rb', line 127

def method_missing(m, *args)
  unless @ob_mol.respond_to?(m)
    super(m, *args)
  end
  @ob_mol.__send__(m, *args)
end

Instance Attribute Details

#cdk2atomObject (readonly)

Returns the value of attribute cdk2atom.



109
110
111
# File 'lib/chem/utils/cdk.rb', line 109

def cdk2atom
  @cdk2atom
end

#cdk_molObject (readonly)

Returns the value of attribute cdk_mol.



109
110
111
# File 'lib/chem/utils/cdk.rb', line 109

def cdk_mol
  @cdk_mol
end

#nameObject

Returns name of molecule. default value is self.source



113
114
115
# File 'lib/chem/model.rb', line 113

def name
  @name ? @name : self.source
end

#ob_molObject (readonly)

Returns the value of attribute ob_mol.



14
15
16
# File 'lib/chem/utils/openbabel.rb', line 14

def ob_mol
  @ob_mol
end

#sourceObject

Returns source of molecule. default value is “”



107
108
109
# File 'lib/chem/model.rb', line 107

def source
  @source ? @source : ""
end

Instance Method Details

#-(other) ⇒ Object



75
76
77
78
79
80
81
# File 'lib/chem/utils/sub.rb', line 75

def - (other)
  if other.instance_of?(Array)
    induced_sub(@nodes - other)
  else
    induced_sub(@nodes - other.nodes)
  end
end

#adjacent_indexObject



51
52
53
54
55
56
57
58
59
# File 'lib/chem/utils/ullmann.rb', line 51

def adjacent_index
  nodes.inject([]) do |ret, node|
    ary = ret[nodes.index(node)] = []
    adjacent_to(node).each do |bond, ad_node|
      ary << nodes.index(ad_node)
    end
    ret
  end
end

#assign_2d_geometryObject

Automatically assigns 2-dimensional geometry This method may implicitly called from ChemRuby if nil is assigned to Atom#x



17
18
19
# File 'lib/chem/utils/geometry.rb', line 17

def assign_2d_geometry
  geometrical_type(nodes[0])
end

#bit_matObject



61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# File 'lib/chem/utils/ullmann.rb', line 61

def bit_mat
  bm = BitMatrix.new(nodes.length, nodes.length)
  if edges.length == 0
    bm.has_matrix = false
  else
    adj = {}
    nodes.each do |node|
      adj[node] = []
      adjacent_to(node).each do |bond, to|
        adj[node] << to
      end
    end

    nodes.each_with_index do |atom1, idx1|
      ary = []
      nodes.each_with_index do |atom2, idx2|
        if adj[atom1].include?(atom2)
          bm.set(idx1, idx2)
        end
      end
    end
  end
  bm
end

#box_sizeObject

Return size of molecule with Array [x, y, z]



7
8
9
10
11
12
# File 'lib/chem/utils/geometry.rb', line 7

def box_size
  size_x = nodes().max{|a1, a2| a1.x <=> a2.x}.x - nodes().min{|a1, a2| a1.x <=> a2.x}.x
  size_y = nodes().max{|a1, a2| a1.y <=> a2.y}.y - nodes().min{|a1, a2| a1.y <=> a2.y}.y
  size_z = nodes().max{|a1, a2| a1.z <=> a2.z}.z - nodes().min{|a1, a2| a1.z <=> a2.z}.z
  [size_x, size_y, size_z]
end

#breadth_first_search(root = @nodes[0]) ⇒ Object Also known as: bfs

Breadth first search solves steps and path to the each node and forms a tree contains all reachable vertices from the root node.



7
8
9
10
11
12
13
14
15
16
17
18
19
20
# File 'lib/chem/utils/traverse.rb', line 7

def breadth_first_search(root = @nodes[0])

  queue = [ root ]

  traversed = []

  while from = queue.shift
	adjacent_to(from).each do |bond, to|
      next if traversed.include?(bond)
      traversed.push(bond)
      queue.push(to) if yield(from, to)
	end
  end
end

#canonical_ring(ring) ⇒ Object

Fix me! This is not sufficient



35
36
37
# File 'lib/chem/utils/sssr.rb', line 35

def canonical_ring ring
  ring.sort{|a, b| nodes.index(a) <=> nodes.index(b)}
end

#cdk_BCUT(params) ⇒ Object

BCUT Descriptors .… Fix me!



303
304
# File 'lib/chem/utils/cdk.rb', line 303

def cdk_BCUT
end

#cdk_calc_descriptor(name, args = []) ⇒ Object



273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
# File 'lib/chem/utils/cdk.rb', line 273

def cdk_calc_descriptor(name, args = [])
  self.cdk_setup
  calc = Rjb::import(DESCRIPTORNAME + name).new
  calc.setParameters(args)
  res = calc.calculate(self.cdk_mol).getValue
  case res._classname
  when "org.openscience.cdk.qsar.result.IntegerResult"
    res.intValue
  when "org.openscience.cdk.qsar.result.DoubleResult"
    res.doubleValue
  when "org.openscience.cdk.qsar.result.IntegerArrayResult"
    (0..(res.size - 1)).to_a.collect{|n| res.get(n)}
  when "org.openscience.cdk.qsar.result.DoubleArrayResult"
    (0..(res.size - 1)).to_a.collect{|n| res.get(n)}
  end
end

#cdk_calc_descriptorsObject

fixme this method does not work very well



342
343
344
345
346
347
348
349
350
# File 'lib/chem/utils/cdk.rb', line 342

def cdk_calc_descriptors
  self.cdk_setup
  engineClass = Rjb::import('org.openscience.cdk.qsar.DescriptorEngine')
  # 1: atom
  # 2: bond?
  # 3: molecule?
  engine = engineClass.new(2)
  engine.process(self.cdk_mol)
end

#cdk_CPSAObject

CPSA



297
298
299
# File 'lib/chem/utils/cdk.rb', line 297

def cdk_CPSA
  cdk_calc_descriptor('CPSADescriptor')
end

#cdk_find_all_ringsObject



121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# File 'lib/chem/utils/cdk.rb', line 121

def cdk_find_all_rings
  cdk_setup

  @@ring_finder ||= Rjb::import('org.openscience.cdk.ringsearch.AllRingsFinder').new
  r_p ||= Rjb::import('org.openscience.cdk.ringsearch.RingPartitioner')
  ringset = @@ring_finder.findAllRings(self.cdk_mol)
  enum = r_p.partitionRings(ringset).elements
  rings = []
  while(enum.hasMoreElements)
    ring = []
    ac = r_p.convertToAtomContainer(enum.nextElement)
    atom_enum = ac.atoms
    puts "--"
    while(atom_enum.hasMoreElements)
      ring << cdk2atom[atom_enum.nextElement.hashCode]
    end
    rings << ring
  end
  rings
end

#cdk_fingerprintObject



212
213
214
# File 'lib/chem/utils/cdk.rb', line 212

def cdk_fingerprint
  'org.openscience.cdk.fingerprint.Fingerprinter'
end

#cdk_gasteiger_marsili_partial_charges(params = {}) ⇒ Object

Fix me !

Fail: unknown method name `assignGasteigerMarsiliFactors


191
192
193
194
195
196
197
198
199
200
# File 'lib/chem/utils/cdk.rb', line 191

def cdk_gasteiger_marsili_partial_charges(params = {})
  cdk_setup
  gm = Rjb::import('org.openscience.cdk.charges.GasteigerMarsiliPartialCharges').new
  gm.setChiCatHydrogen = params[:deoc_hydrogen] if params[:deoc_hydrogen]
  p gm.getStepSize
  p gm.assignGasteigerMarsiliFactors(self.cdk_mol)
#      gm.assignGasteigerMarsiliFactors(self.cdk_mol)
#      gm.assignGasteigerMarsiliPartialCharges(self.cdk_mol, false)
#      gm.assignGasteigerMarsiliPartialCharges(self.cdk_mol, true)
end

#cdk_generate_2DObject Also known as: cdk_calc_2d



111
112
113
114
115
116
117
118
# File 'lib/chem/utils/cdk.rb', line 111

def cdk_generate_2D
  cdk_setup
  @@gen_cls ||= Rjb::import('org.openscience.cdk.layout.StructureDiagramGenerator')
  generator = @@gen_cls.new#(self.cdk_mol)
  generator.setMolecule(self.cdk_mol)
  generator.generateCoordinates
  Chem::CDK::CDKMolecule.new(generator.getMolecule)
end

#cdk_generate_randomlyObject



164
165
166
167
168
# File 'lib/chem/utils/cdk.rb', line 164

def cdk_generate_randomly
  cdk_setup
  gen = Rjb::import('org.openscience.cdk.structgen.RandomGenerator').new(self.cdk_mol)
  CDK::CDKMolecule.new(gen.proposeStructure)
end

#cdk_generate_vicinityObject



170
171
172
173
174
175
176
177
178
179
180
# File 'lib/chem/utils/cdk.rb', line 170

def cdk_generate_vicinity
  cdk_setup
  gen = Rjb::import('org.openscience.cdk.structgen.VicinitySampler').new(self.cdk_mol)
  ary = gen.sample(self.cdk_mol)
  enum = ary.elements
  ret = []
  while enum.hasMoreElements
    ret << CDK::CDKMolecule.new(enum.nextElement)
  end
  ret
end

#cdk_hose_code(atom, depth = 3) ⇒ Object

Return HOSE code Anal. Chim. Acta. (1978) 103:355-365



204
205
206
207
# File 'lib/chem/utils/cdk.rb', line 204

def cdk_hose_code(atom, depth = 3)
  hose_gen = Rjb::import('org.openscience.cdk.tools.HOSECodeGenerator').new
  hose_gen.getHOSECode(mol, mol.getAtomAt(9), 3)
end

#cdk_hueckelObject

HueckelAromaticityDetector



183
184
185
186
187
# File 'lib/chem/utils/cdk.rb', line 183

def cdk_hueckel
  cdk_setup
  huckel = Rjb::import('org.openscience.cdk.aromaticity.HueckelAromaticityDetector')
  huckel.detectAromaticity(self.cdk_mol)
end

#cdk_mcs(other) ⇒ Object



258
259
260
261
262
263
264
265
266
267
268
269
270
# File 'lib/chem/utils/cdk.rb', line 258

def cdk_mcs(other)
  self.cdk_setup
  other.cdk_setup

  mcsClass = Rjb::import('org.openscience.cdk.isomorphism.UniversalIsomorphismTester')
  iso = mcsClass.getOverlaps(self.cdk_mol, other.cdk_mol)
  maps = []
  itr = iso.iterator
  while(itr.hasNext)
    maps << CDK::CDKMolecule.new(itr.next)
  end
  maps
end

#cdk_propertiesObject

dump CDK properties… useless…



321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
# File 'lib/chem/utils/cdk.rb', line 321

def cdk_properties
  self.cdk_setup
  hash = self.cdk_mol.getProperties
  keys = hash.keys
  while(keys.hasMoreElements)
    k = keys.nextElement
    p k.toString
    if /org.openscience.cdk.qsar.DescriptorSpecification/.match(k.toString)
      p [
        k.getImplementationIdentifier,
        k.getImplementationTitle,
        k.getImplementationVendor,
        k.getSpecificationReference
      ]
    end
    p hash.get(k).toString
  end
end

#cdk_RotatableBondsCount(rot = [true]) ⇒ Object

args : terminal atoms must be included in the count



315
316
317
# File 'lib/chem/utils/cdk.rb', line 315

def cdk_RotatableBondsCount(rot = [true])
  cdk_calc_descriptor('RotatableBondsCountDescriptor', rot)
end

#cdk_rule_of_file(params = true) ⇒ Object

Lipinki’s Rule of file



308
309
310
# File 'lib/chem/utils/cdk.rb', line 308

def cdk_rule_of_file(params = true)
  cdk_calc_descriptor('RuleOfFiveDescriptor', [params])
end

#cdk_save_as(path, params = {}) ⇒ Object



352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
# File 'lib/chem/utils/cdk.rb', line 352

def cdk_save_as(path, params = {})
  self.cdk_setup

  params[:type]   ||= :png
  params[:width]  ||= 100
  params[:height] ||= 100

  image_kit = Rjb::import('net.sf.structure.cdk.util.ImageKit')
  case params[:type]
  when :png
    image_kit.writePNG(self.cdk_mol, params[:width], params[:height], path)
  when :svg
    image_kit.writeSVG(self.cdk_mol, params[:width], params[:height], path)
  when :jpg
    image_kit.writeJPG(self.cdk_mol, params[:width], params[:height], path)
  end
end

#cdk_setupObject



216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
# File 'lib/chem/utils/cdk.rb', line 216

def cdk_setup
  return unless self.cdk_mol.nil?
  require 'rcdk'
  atom_class = Rjb::import('org.openscience.cdk.Atom')
  bond_class = Rjb::import('org.openscience.cdk.Bond')
  ac         = Rjb::import('org.openscience.cdk.AtomContainer').new
  point3d    = Rjb::import('javax.vecmath.Point3d')
  point2d    = Rjb::import('javax.vecmath.Point2d')
  i = 0
  @cdk2atom = {}
  atoms = nodes.collect{ |node|
    i += 1
    atom = atom_class.new(node.element.to_s)

#        atom.setPoint3d(point3d.new(node.x.to_f, node.y.to_f, node.z.to_f))
#        atom.setPoint2d(point2d.new(node.x.to_f, node.y.to_f, node.z.to_f))

    atom.setSymbol(node.element.to_s)
    node.cdk_atom = atom
    @cdk2atom[atom.hashCode] = node
    atom
  }
  ac.setAtoms(atoms)
  edges.each do |edge, node1, node2|
    atom1 = ac.getAtomAt(nodes.index(node1))
    atom2 = ac.getAtomAt(nodes.index(node2))
    bond = bond_class.new(atom1, atom2, edge.v.to_f)
    ac.addBond(bond)
    @cdk_mol = Rjb::import('org.openscience.cdk.Molecule').new(ac)
  end
  self
end

#cdk_sssrObject



142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
# File 'lib/chem/utils/cdk.rb', line 142

def cdk_sssr
  cdk_setup

  @@sssr_finder ||= Rjb::import('org.openscience.cdk.ringsearch.SSSRFinder')
  r_p ||= Rjb::import('org.openscience.cdk.ringsearch.RingPartitioner')
  sssr = @@sssr_finder.new(self.cdk_mol)
  enum = r_p.partitionRings(sssr.findSSSR).elements

  rings = []
  while(enum.hasMoreElements)
    ring = []
    ac = r_p.convertToAtomContainer(enum.nextElement)
    atom_enum = ac.atoms
    puts 
    while(atom_enum.hasMoreElements)
      ring << cdk2atom[atom_enum.nextElement.hashCode]
    end
    rings << ring
  end
  rings
end

#cdk_wiener_numbersObject

Wiener path number Wiener polarity number



292
293
294
# File 'lib/chem/utils/cdk.rb', line 292

def cdk_wiener_numbers
  cdk_calc_descriptor('WienerNumbersDescriptor')
end

#cdk_xlogpObject



249
250
251
252
253
254
255
256
# File 'lib/chem/utils/cdk.rb', line 249

def cdk_xlogp
  self.cdk_setup
  add_hydrogen = Rjb::import('org.openscience.cdk.tools.HydrogenAdder').new
  add_hydrogen.addExplicitHydrogensToSatisfyValency(self.cdk_mol)
  xlogp = Rjb::import('org.openscience.cdk.qsar.descriptors.molecular.XLogPDescriptor').new
  xlogp.setParameters([true, true])
  xlogp.calculate(self.cdk_mol).getValue().doubleValue
end

#compositionObject

Returns composition Chem.open_mol(“benzene”).composition # => 6, :H => 6



82
83
84
85
86
87
88
89
90
# File 'lib/chem/utils/prop.rb', line 82

def composition

  composition = {}
  @nodes.each do |atom|
    composition[atom.element] ||= 0
    composition[atom.element] += 1
  end
  composition
end

#connected?Boolean

Returns:

  • (Boolean)


19
20
21
22
23
24
25
26
27
# File 'lib/chem/utils/sub.rb', line 19

def connected?
  traversed = []
  start = @nodes[0]
  traversed << start
  dfs(start) do |from, to|
    traversed << to
  end
  traversed.length == @nodes.length
end

#deep_dupObject



67
68
69
70
71
72
73
# File 'lib/chem/utils/sub.rb', line 67

def deep_dup
  ret = dup
  ret.nodes = @nodes.dup
  #ret.adjacencies = @adjacencies.dup if @adjacencies
  ret.edges = @edges.dup
  ret
end

#delete(atom) ⇒ Object



60
61
62
63
64
65
# File 'lib/chem/utils/sub.rb', line 60

def delete(atom)
  @nodes.delete(atom)
  adjacent_to(atom).each do |adj_edge, adj_node|
    @edges.delete_if{|bond, atom_a, atom_b| bond == adj_edge}
  end
end

#delete_bond(bond) ⇒ Object



53
54
55
56
57
58
# File 'lib/chem/utils/sub.rb', line 53

def delete_bond(bond)
  @edges.delete(bond)
  @adjacencies.each do |v, k|
    k.delete_if{ |b, atom_a, atom_b| bond == b}
  end
end

#depth_first_search(from = , traversed = [], &block) ⇒ Object Also known as: dfs



24
25
26
27
28
29
30
31
# File 'lib/chem/utils/traverse.rb', line 24

def depth_first_search(from = @nodes[0], traversed = [], &block)
  adjacent_to(from).each do |bond, to|
    next if traversed.include?(bond)
    traversed.push(bond)
    yield(from, to, bond)
    depth_first_search(to, traversed, &block)
  end
end

#divideObject

divide compounds by connectivity e.g. washing salts.



31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# File 'lib/chem/utils/sub.rb', line 31

def divide
  traversed = []
  start = @nodes[0]
  divided_compound = []

  while traversed.length != @nodes.length
    part = []
    traversed << start
    part << start
    dfs(start) do |from, to, bond|
      unless part.include?(to)
        traversed << to
        part << to
      end
    end

    start = @nodes.find{|node| !traversed.include?(node)}
    divided_compound << induced_sub(part)
  end
  divided_compound
end

#f_dfs(node, path, max, &block) ⇒ Object



30
31
32
33
34
35
36
37
38
39
40
41
42
# File 'lib/chem/utils/fingerprint.rb', line 30

def f_dfs node, path, max, &block
  if not path.length > max
    yield path
    self.adjacent_to(node).each do |bond, n|
      next if n.element == :H
      if not path.include?(n)
        path.push(n)
        f_dfs(n, path, max, &block)
        path.pop
      end
    end
  end
end

#find_smallest_ring(root) ⇒ Object

  1. Chem. Inf. Comput. Sci. 1994, 34, 822-831

Renzo Balducci and Robert S. Pearlman Efficient Exact Solution of the Ring Perception Problem



17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# File 'lib/chem/utils/sssr.rb', line 17

def find_smallest_ring root
  path = {}
  path[root] = [root]

  bfs(root) do |from, to|
	if visit = !path.keys.include?(to)
	  path[to] = path[from].clone
	  path[to].push(to)
	elsif path[from][-2] != to
	  if 1 == (path[from] & path[to]).length
 return path[from] + path[to][1..-1].reverse
	  end
	end
	visit
  end
end

#find_sssrObject

Returns Smallest Set of Smallest Ring



40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# File 'lib/chem/utils/sssr.rb', line 40

def find_sssr

  fullSet = nodes.dup
  trimSet = []
  rings = []
  mol = {}

  nodes.each do |node|
    mol[node] = []
    adjacent_to(node).each do |bond, atom|
      mol[node] << atom
    end
  end

  loop do
	nodesN2 = []
	smallest_degree = 10
	smallest = nil

	mol.each do |k, a|
	  case a.length
	  when 0
 mol.delete(k)# Is this OK?
 trimSet.push(k)
	  when 2
 nodesN2.push(k)
	  end
	  if a.length > 0 && a.length < smallest_degree
 smallest = k
 smallest_degree = a.length
	  end
	end

	case smallest_degree
	when 1
	  trim(mol, smallest)
	when 2
	  nodesN2.each do |k|
 ring = find_smallest_ring(k)
        if ring && !rings.include?(canonical_ring(ring))
          rings.push(canonical_ring(ring))
        end
	  end
	  nodesN2.each do |k|
 trim(mol, k)
	  end
	when 3
	  ring = find_smallest_ring(smallest)
	  trim(mol, smallest)
	end

	break if mol.length  == 0
  end
  rings
end

#fingerprint(max = 3, n_bits = 32) ⇒ Object



58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# File 'lib/chem/utils/fingerprint.rb', line 58

def fingerprint(max = 3, n_bits = 32)

  find_sssr.each do |rings|
    len = rings.length
    rings.each do |atom|
      (atom.rings ||= []) << len
    end
  end

  fp = 0
  set = Set.new

  nodes.each do |node|
    f_dfs(node, [node], max) do |path|
      # Exclude unwanted path
      key = path.collect{|atom| atom.element.to_s}.join(".")
      next if set.include?(key)

      set.add(key)
      set.add(path.reverse.collect{|atom| atom.element.to_s}.join("."))
      # seed calculation
      seed = 0
      path.each_with_index do |atom, idx|
        seed += (1 << ( 5 * idx)) *
          ELEMNUM[atom.element] *
          (atom.rings.nil? ? 1 : (1 << atom.rings.length))
      end
      srand(seed)
      1.times do |n|
        fp |= 1 << rand(n_bits)
      end
    end
  end
  fp
end

#generate_pubchem_subskeyObject



1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
# File 'lib/chem/db/pubchem.rb', line 1028

def generate_pubchem_subskey
  fp = 0
  # Section 1
  self.composition.each do |elem, num|
    HierarchicElementCounts[elem].each do |n_atoms, bit|
      fp |= (1 << bit) if num >= n_atoms
    end
  end

  # Section 2
  [143, 150, 157, 164, 171]
  s2bit = []
  sssrs = self.find_sssr.inject({}){|hash, ring| (hash[ring.size] ||= []) << ring ; hash}
  sssrs.each do |ring_size, rings|
#        base_num = case ring_size
                 
#        p [ring_size, rings.size]
#        p rings.any?{|ring| ring.any?{|atom| atom.element == :N}}
#        p rings.any?{|ring| ring.any?{|atom| atom.element != :C}}
  end

  # Section 3
  self.edges.collect{ |bond, atom1, atom2|
	[atom1.element.to_s, atom2.element.to_s].sort.join("-")
  }.uniq.each do |pair|
    fp |= (1 << Section3[pair]) if Section3[pair]
  end

  # Section 4
  self.nodes.each do |node|
    adj = self.adjacent_to(node).collect{|bond, atom| }
    p Section4[node.element]
  end
#      exit
  

  Section6.each do |smarts, bit|
    pat = Chem::OpenBabel::parse_smarts(smarts)
    fp |= (1 << bit ) if pat.match(self)
  end

  Section7.each do |smarts, bit|
    pat = Chem::OpenBabel::parse_smarts(smarts)
    fp |= (1 << bit ) if pat.match(self)
  end
  fp
end

#hilight(atoms, color = [1, 0, 0]) ⇒ Object



16
17
18
19
20
21
# File 'lib/chem/db/vector.rb', line 16

def hilight(atoms, color = [1, 0, 0])
  edges.each do |bond, atom1, atom2|
    bond.color = [1, 0, 0] if atoms.include?(atom1) and atoms.include?(atom2)
  end
  nodes.each{|atom| atom.color = [1, 0, 0] if atoms.include?(atom)}
end

#induced_sub(ary) ⇒ Object



11
12
13
14
15
16
17
# File 'lib/chem/utils/sub.rb', line 11

def induced_sub ary
  sub = deep_dup
  (sub.nodes - ary).each do |node|
    sub.delete(node)
  end
  sub
end

#match(target, &block) ⇒ Object



24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# File 'lib/chem/utils/ullmann.rb', line 24

def match(target, &block)
  ary = nil

  if block_given?
    ary = match_by_ullmann(target){ |i, j|
      yield(self.nodes[i], target.nodes[j])
    }
  else
    ary = match_by_ullmann(target)
  end

  ret = []
  ary.each do |a|
    hash = {}
    a.each_with_index do |i, j|
      hash[nodes[j]] = target.nodes[i]
    end
    hash
    ret << hash
  end
  ret
end

#match_by_ullmann(target, &block) ⇒ Object



19
20
21
22
# File 'lib/chem/utils/ullmann.rb', line 19

def match_by_ullmann(target, &block)
  require 'subcomp'
  Chem.match_by_ullmann(self, target, &block)
end

#molecular_weight(prop = {}) ⇒ Object Also known as: mw

Returns molecular weight mol.molecular_weight :unknown_atom => true



44
45
46
47
48
49
50
51
52
53
54
55
# File 'lib/chem/utils/prop.rb', line 44

def molecular_weight prop = {}
  comp = self.composition()
  comp.inject(0.0){|ret, (el, n)|
    if AtomicWeight[el]
      ret + AtomicWeight[el] * n
    elsif prop[:neglect_unknown_atom]
      ret
    else
      return nil
    end
  }
end

#n_hydrogen(node) ⇒ Object

Returns number of hydrogen this method may be overrided



34
35
36
37
38
39
40
# File 'lib/chem/utils/prop.rb', line 34

def n_hydrogen node
  n_h = node.natural_bond_order
  adjacent_to(node).each do |bond, atom|
    n_h -= bond.v
  end
  n_h
end

#ob_export_as(filetype) ⇒ Object



22
23
24
25
26
27
# File 'lib/chem/utils/openbabel.rb', line 22

def ob_export_as(filetype)
  use_open_babel if @ob_mol.nil?
  conv = ::OpenBabel::OBConversion.new
  conv.set_out_format(filetype.to_s)
  conv.write_string(@ob_mol)
end

#ob_save_as(path, filetype) ⇒ Object



15
16
17
18
19
20
# File 'lib/chem/utils/openbabel.rb', line 15

def ob_save_as(path, filetype)
  use_open_babel if @ob_mol.nil?
  conv = ::OpenBabel::OBConversion.new
  conv.set_out_format(filetype.to_s)
  conv.write_file(@ob_mol, path)
end

#oxidation_number(node) ⇒ Object

Returns oxidation number of node this method can be moved to Atom module



61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# File 'lib/chem/utils/prop.rb', line 61

def oxidation_number node
  en = 0
  adjacent_to(node).each do |bond, atom|
    case node.electro_negativity <=> atom.electro_negativity
    when -1
      en += bond.v
    when 1
      en -= bond.v
    end
  end
  # implicit hydrogen
  if ElectroNegativity[:H] < node.electro_negativity
    en -= n_hydrogen(node)
  else
    en += n_hydrogen(node)
  end
  en
end

#pubchem_subskeysObject



1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
# File 'lib/chem/db/pubchem.rb', line 1078

def pubchem_subskeys
  require 'base64'
  fp = 0
  b64 = self.sdf_data["PUBCHEM_CACTVS_SUBSKEYS"]
  Base64.decode64(b64).unpack("N*")[1..-1].each_with_index do |bit, idx|
#        fp += (bit << (881 - (idx + 1) * 32))
    bb = 0
    0.upto(31) do |n|
      bb += (1 << n) if (bit & (1 << (31 - n)) != 0)
    end
    fp += (bb << (idx * 32))
  end
  fp
end

#remove_hydrogens!Object



21
22
23
24
25
# File 'lib/chem/utils.rb', line 21

def remove_hydrogens!
  hyd = nodes.select{|atom| atom.element == :H}
  @edges = @edges.reject{|b, f, t| hyd.include?(f) or hyd.include?(t)}
  @nodes = @nodes - hyd
end

#save(filename, params = {}, &block) ⇒ Object

Saves files for arbitrary format. file type is automatically detected by file extensions.

You can optionally pass parameters as second argument.

Options

:type

> :png # Explicit file type



109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# File 'lib/chem.rb', line 109

def save(filename, params = {}, &block)

  format_type = params[:type]
  format = ChemTypeRegistry.find{|format| format.detect_type format_type}

  unless format_type
    format = ChemTypeRegistry.find{|format| format.detect_file filename}
  else
    format = ChemTypeRegistry.find{|format| format.detect_type format_type}
  end

  unless format
    raise(NotImplementedError)
  end
  format.save(self, filename, params)
end

#save_as_mdl(filename) ⇒ Object



14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# File 'lib/chem/db/mdl.rb', line 14

def save_as_mdl(filename)
  File.open(filename, "w") do |out|
    now = DateTime.now
    out.puts 
    out.puts MDLHeaderLine2Format % [
      "  ",
      "ChemRuby",
      now.month,
      now.mday,
      now.year % 2000,
      now.hour,
      now.min
    ]
    out.puts filename
    out.puts MDLCountLineFormat % [nodes.length, edges.length, 0, 0, 0, 0, 0, 0, 0]
    nodes.each do |node|
      out.puts node.to_mdl
    end
    edges.each do |edge, atom1, atom2|
      out.puts edge.to_mdl(nodes.index(atom1) + 1, nodes.index(atom2) + 1)
    end
    out.puts "M  END"
  end
end

#save_as_pdf(out, params = {}) ⇒ Object

Explicitly save molecule as PDF

Example:

mol = Chem.open_mol("benzene.mol")
mol.save_as_pdf("benzene.pdf")
mol.save("benzene.pdf", :type => :pdf)
mol.save("benzene.pdf") # File type will automatically detected from file extensions


11
12
13
14
# File 'lib/chem/db/vector.rb', line 11

def save_as_pdf(out, params = {})
  v = PDFWriter.new(self, params)
  v.save(out)
end

#subset_in_composition?(to) ⇒ Boolean

return 1 if self.composition > to.composition return 0 if self.composition == to.composition return -1 if self.composition < to.composition return false if self.composition <> to.composition

Returns:

  • (Boolean)


96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# File 'lib/chem/utils/prop.rb', line 96

def subset_in_composition?(to)
  self_is_sub = false
  to_is_sub   = false
  all = (to.composition.keys + composition.keys).uniq
  return false if all.length == 0
  if (all - composition.keys).length > 0 && (all - to.composition.keys).length > 0
    return false
  elsif (all - composition.keys).length > 0
    return -1 if composition.all?{|k, v| v <= to.composition[k]}
    return false
  elsif (all - to.composition.keys).length > 0
    return 1 if to.composition.all?{|k, v| v <= composition[k]}
    return false
  elsif all.length == composition.keys.length && all.length == to.composition.length
    # then compare number of nodes ?
    if all.all? { |node| composition[node] == to.composition[node]}
      return 0
    elsif all.all?{ |node| composition[node] >= to.composition[node]}
      return 1
    elsif all.all?{ |node| composition[node] <= to.composition[node]}
      return -1
    end
  end
  return false
end

#to_cansmiObject

Returns Canonical SMILES



11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# File 'lib/chem/db/cansmi.rb', line 11

def to_cansmi
  cycle = 0
  priority = canonical_smiles_priority_from_invariant
  new_priority, n = update_priority(priority)
#      show new_priority
  prev_n = 0
  while prev_n != n
    prev_n = n
    new_priority = calc_prime_product(new_priority)
#        show new_priority
    new_priority, n = update_priority(new_priority)
#        show new_priority
  end

  puts
  for node in @nodes
    p new_priority[node]
  end
  show new_priority
  start = new_priority.min{|a, b| a[1] <=> b[1]}[0]
  get_tree(start, new_priority)
#      get_canonical_smiles start, new_priority
end

#to_eps(para = EpsParameter.new) ⇒ Object



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# File 'lib/chem/db/eps.rb', line 13

def to_eps(para = EpsParameter.new)
  # What should I do to ensure 2D features?

  str = ''
  if block_given?
    yield para
  end

  ratio, min = para.calc_bounding_box_size(@nodes)

  str = header(para)

  pos = {}

  @nodes.each do |atom|
    pos[atom] = Vector[atom.x, atom.y]
    pos[atom] -= min

    #diff = diff == 0 ? 1 : diff
    pos[atom] *= para.diff * 100
    pos[atom] += para.orig_pt + Vector[para.margin, para.margin] + ratio * 0.5

#         if para.has_atom_yield
#           str += eps.atom_yield.call(atom)
#         end
#        str += atom.eps_header if atom.eps_header
#        if(atom.visible)
      str += "%5f %5f moveto\n" % [pos[atom][0], pos[atom][1]]
      str += "(" + atom.element.to_s + ") dup stringwidth pop 2 div neg -1.5 rmoveto show\n"

#        end
#        str += atom.eps_footer if atom.eps_footer
  end
#      @nodes.each do ||

  @edges.each do |bond, atom1, atom2|
    #str += bond.eps_header if bond.eps_header
    beginX = pos[atom1][0]
    beginY = pos[atom1][1]
    endX   = pos[atom2][0]
    endY   = pos[atom2][1]
    dx = (endX - beginX) / ((endX - beginX)**2 + (endY - beginY)**2)**0.5
    dx = dx.nan? ? 0 : dx / 2.0
    dy = (endY - beginY) / ((endX - beginX)**2 + (endY - beginY)**2)**0.5
    dy = dy.nan? ? 0 : dy / 2.0
    if(atom2.visible)
      endX = endX - char_height * dx
      endY = endY - char_height * dy
    end
    if(atom1.visible)
      beginX = beginX + char_size * dx
      beginY = beginY + char_size * dy
    end
    transition = bond.respond_to?('i') ? bond.i : 0
    multi_bond_ratio = 1.0
    beginX = beginX - dy * (bond.v - 1 + transition.abs) * multi_bond_ratio
    beginY = beginY + dx * (bond.v - 1 + transition.abs) * multi_bond_ratio
    endX   = endX   - dy * (bond.v - 1 + transition.abs) * multi_bond_ratio
    endY   = endY   + dx * (bond.v - 1 + transition.abs) * multi_bond_ratio
    valence = bond.v
#        1.upto(bond.v + transition.abs) do |n|
    (bond.v + transition.abs).times do |n|
#           if(color)
#             if(transition < 0)
#               str += "1 0 0 setrgbcolor\n"
#             elsif(transition > 0)
#               str += "0 0 1 setrgbcolor\n"
#             else
#               str += "0 0 0 setrgbcolor\n"
#             end
#           end
      str += "newpath %f %f moveto %f %f lineto stroke\n" % [beginX, beginY, endX, endY]
      centerX = (endX + beginX) /2
      centerY = (endY + beginY) /2
            if(transition >0)
              str += centerX.to_s + " " + centerY.to_s + " " + inbond.to_s + " 0 360 arc stroke\n"
    elsif(transition <0)
      str += "newpath %f %f moveto %f %f lineto stroke\n" %
             [centerX + dy - dx*outbond, centerY - dx - outbond * dy,
              centerX - dy - outbond * dx, dx - outbond * dy + centerY]
      str += "newpath %f %f moveto %f %f lineto stroke\n" %
             [centerX + dy + dx*outbond, centerY - dx + outbond * dy, 
              centerX - dy + outbond * dx, dy * outbond + dx + centerY]
    end
    transition = transition + 1 if(transition < 0)
    transition = transition - 1 if(transition > 0)
    valence = valence - 1
    beginX = beginX + dy  * multi_bond_ratio * 2
    beginY = beginY - dx  * multi_bond_ratio * 2
    endX   = endX   + dy  * multi_bond_ratio * 2
    endY   = endY   - dx  * multi_bond_ratio * 2
    end
  end
#      str += "0 0 0 setrgbcolor\n"
  #      str += " #{@size / 2.0} #{@size / 2.0} #{@size / 2.0 + @margin} 0 360 arc stroke\n"

  #open("test.eps", "w").puts str
  str
end

#to_inchiObject



29
30
31
32
# File 'lib/chem/utils/openbabel.rb', line 29

def to_inchi
  use_open_babel
  ob_export_as("inchi").chop
end

#to_sybylObject

Return sybyl formatted molecule



7
8
# File 'lib/chem/db/sybyl.rb', line 7

def to_sybyl
end

#typ_strObject



47
48
49
# File 'lib/chem/utils/ullmann.rb', line 47

def typ_str
  nodes.collect{|atom| atom.atomic_number}.pack("l*")
end

#use_open_babelObject

set OpenBabel OBMol object to instance variable @ob_mol



35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# File 'lib/chem/utils/openbabel.rb', line 35

def use_open_babel
  begin
    require 'openbabel'
  rescue Exception
    require 'OpenBabel'
  end
  @ob_mol = ::OpenBabel::OBMol.new
  nodes.each do |node|
    atom = @ob_mol.new_atom
    atom.set_atomic_num(Element2Number[node.element])
    atom.set_vector(node.x.to_f, node.y.to_f, node.z.to_f)
    node.ob_atom = atom
  end
  edges.each do |bond, atom1, atom2|
    @ob_mol.add_bond(
                     atom1.ob_atom.get_idx,
                     atom2.ob_atom.get_idx,
                     bond.v.to_i
                     )
  end
end