Module: Chem::Molecule

Includes:: Graph

Included in:: CDK::CDKMolecule, CDX::CDX, CMLMolecule, GSpanMolecule, KCF::RPairMolecule, KEGG::KCF, KEGG::KCFMolecule, KEGG::KeggCompound, Chem::MDL::MdlMolecule, Chem::MDL::MdlReaction, OpenBabel::OBMolecule, OpsinMolecule, PDB::PDBMolecule, SmilesMol, Sybyl::SybylMolecule, XYZ::XyzMolecule

Defined in:: lib/chem/db/cansmi.rb,
lib/chem.rb,
lib/chem/model.rb,
lib/chem/utils.rb,
lib/chem/db/eps.rb,
lib/chem/db/mdl.rb,
lib/chem/db/sybyl.rb,
lib/chem/db/vector.rb,
lib/chem/utils/cdk.rb,
lib/chem/utils/sub.rb,
lib/chem/db/pubchem.rb,
lib/chem/utils/prop.rb,
lib/chem/utils/sssr.rb,
lib/chem/utils/ullmann.rb,
lib/chem/utils/geometry.rb,
lib/chem/utils/traverse.rb,
lib/chem/utils/openbabel.rb,
lib/chem/utils/fingerprint.rb,
lib/chem/db/types/type_cansmi.rb

Overview

A module for assigning canonical smiles

Defined Under Namespace

Classes: EpsParameter

Constant Summary collapse

EpsHeader =

"%%!PS-Adobe-3.0 EPSF-3.0\n" +
"%%Creator: ChemRuby n.tanaka\n" + 
"%%For: Scientists\n" +
"%%Title: Molecular compound\n" +
"%%CreationDate: %d/%d/%d %d:%d \n"

MDLCountLineFormat =

"%3d%3d%3d%3d%3d%3d%3d%3d%3d  0999 V2000"

MDLHeaderLine2Format =

"%2s%8s%02d%02d%02d%02d%02d"

DESCRIPTORNAME =

'org.openscience.cdk.qsar.descriptors.molecular.'

ELEMNUM = ELEMNUM = :C => 0, :N => 1, :O => 2, :P => 4 ELEMNUM.default = 32

Element2Number.inject({}) do |ret, (elem, num)|
  ret[elem] = 1 << num
  ret
end

Instance Attribute Summary collapse

#cdk2atom ⇒ Object readonly

Returns the value of attribute cdk2atom.
#cdk_mol ⇒ Object readonly

Returns the value of attribute cdk_mol.
#name ⇒ Object

Returns name of molecule.
#ob_mol ⇒ Object readonly

Returns the value of attribute ob_mol.
#source ⇒ Object

Returns source of molecule.

Attributes included from Graph

#adjacencies, #edges, #nodes

Instance Method Summary collapse

#-(other) ⇒ Object
#adjacent_index ⇒ Object
#assign_2d_geometry ⇒ Object

Automatically assigns 2-dimensional geometry This method may implicitly called from ChemRuby if nil is assigned to Atom#x.
#bit_mat ⇒ Object
#box_size ⇒ Object

Return size of molecule with Array [x, y, z].
#breadth_first_search(root = @nodes[0]) ⇒ Object (also: #bfs)

Breadth first search solves steps and path to the each node and forms a tree contains all reachable vertices from the root node.
#canonical_ring(ring) ⇒ Object

Fix me! This is not sufficient.
#cdk_BCUT(params) ⇒ Object

BCUT Descriptors .…
#cdk_calc_descriptor(name, args = []) ⇒ Object
#cdk_calc_descriptors ⇒ Object

fixme this method does not work very well.
#cdk_CPSA ⇒ Object

CPSA.
#cdk_find_all_rings ⇒ Object
#cdk_fingerprint ⇒ Object
#cdk_gasteiger_marsili_partial_charges(params = {}) ⇒ Object

Fix me ! Fail: unknown method name ‘assignGasteigerMarsiliFactors.
#cdk_generate_2D ⇒ Object (also: #cdk_calc_2d)
#cdk_generate_randomly ⇒ Object
#cdk_generate_vicinity ⇒ Object
#cdk_hose_code(atom, depth = 3) ⇒ Object

Return HOSE code Anal.
#cdk_hueckel ⇒ Object

HueckelAromaticityDetector.
#cdk_mcs(other) ⇒ Object
#cdk_properties ⇒ Object

dump CDK properties…
#cdk_RotatableBondsCount(rot = [true]) ⇒ Object

args : terminal atoms must be included in the count.
#cdk_rule_of_file(params = true) ⇒ Object

Lipinki’s Rule of file.
#cdk_save_as(path, params = {}) ⇒ Object
#cdk_setup ⇒ Object
#cdk_sssr ⇒ Object
#cdk_wiener_numbers ⇒ Object

Wiener path number Wiener polarity number.
#cdk_xlogp ⇒ Object
#composition ⇒ Object

Returns composition Chem.open_mol(“benzene”).composition # => 6, :H => 6.
#connected? ⇒ Boolean
#deep_dup ⇒ Object
#delete(atom) ⇒ Object
#delete_bond(bond) ⇒ Object
#depth_first_search(from = , traversed = [], &block) ⇒ Object (also: #dfs)
#divide ⇒ Object

divide compounds by connectivity e.g.
#f_dfs(node, path, max, &block) ⇒ Object
#find_smallest_ring(root) ⇒ Object

J.
#find_sssr ⇒ Object

Returns Smallest Set of Smallest Ring.
#fingerprint(max = 3, n_bits = 32) ⇒ Object
#generate_pubchem_subskey ⇒ Object
#hilight(atoms, color = [1, 0, 0]) ⇒ Object
#induced_sub(ary) ⇒ Object
#match(target, &block) ⇒ Object
#match_by_ullmann(target, &block) ⇒ Object
#method_missing(m, *args) ⇒ Object

Redirect methods to OpenBabel.
#molecular_weight(prop = {}) ⇒ Object (also: #mw)

Returns molecular weight mol.molecular_weight :unknown_atom => true.
#n_hydrogen(node) ⇒ Object

Returns number of hydrogen this method may be overrided.
#ob_export_as(filetype) ⇒ Object
#ob_save_as(path, filetype) ⇒ Object
#oxidation_number(node) ⇒ Object

Returns oxidation number of node this method can be moved to Atom module.
#pubchem_subskeys ⇒ Object

Extract PubChem substructural keys see ftp.ncbi.nlm.nih.gov/pubchem/specifications/pubchem_fingerprints.txt.
#remove_hydrogens! ⇒ Object
#save(filename, params = {}, &block) ⇒ Object

Saves files for arbitrary format.
#save_as_mdl(filename) ⇒ Object
#save_as_pdf(out, params = {}) ⇒ Object

Explicitly save molecule as PDF = Example: mol = Chem.open_mol(“benzene.mol”) mol.save_as_pdf(“benzene.pdf”) mol.save(“benzene.pdf”, :type => :pdf) mol.save(“benzene.pdf”) # File type will automatically detected from file extensions.
#subset_in_composition?(to) ⇒ Boolean

return 1 if self.composition > to.composition return 0 if self.composition == to.composition return -1 if self.composition < to.composition return false if self.composition <> to.composition.
#to_cansmi ⇒ Object

Returns Canonical SMILES.
#to_eps(para = EpsParameter.new) ⇒ Object
#to_inchi ⇒ Object
#to_sybyl ⇒ Object

Return sybyl formatted molecule.
#typ_str ⇒ Object
#use_open_babel ⇒ Object

set OpenBabel OBMol object to instance variable @ob_mol.

Methods included from Graph

#adjacent_to, #clustering_coefficient, #each, #morgan, #terminal_nodes

Dynamic Method Handling

This class handles dynamic methods through the method_missing method

#method_missing(m, *args) ⇒ `Object`

Redirect methods to OpenBabel

# File 'lib/chem.rb', line 127

def method_missing(m, *args)
  unless @ob_mol.respond_to?(m)
    super(m, *args)
  end
  @ob_mol.__send__(m, *args)
end

Instance Attribute Details

#cdk2atom ⇒ `Object` (readonly)

Returns the value of attribute cdk2atom.



109
110
111

# File 'lib/chem/utils/cdk.rb', line 109

def cdk2atom
  @cdk2atom
end

#cdk_mol ⇒ `Object` (readonly)

Returns the value of attribute cdk_mol.



109
110
111

# File 'lib/chem/utils/cdk.rb', line 109

def cdk_mol
  @cdk_mol
end

#name ⇒ `Object`

Returns name of molecule. default value is self.source



113
114
115

# File 'lib/chem/model.rb', line 113

def name
  @name ? @name : self.source
end

#ob_mol ⇒ `Object` (readonly)

Returns the value of attribute ob_mol.



14
15
16

# File 'lib/chem/utils/openbabel.rb', line 14

def ob_mol
  @ob_mol
end

#source ⇒ `Object`

Returns source of molecule. default value is “”



107
108
109

# File 'lib/chem/model.rb', line 107

def source
  @source ? @source : ""
end

Instance Method Details

#-(other) ⇒ `Object`

# File 'lib/chem/utils/sub.rb', line 75

def - (other)
  if other.instance_of?(Array)
    induced_sub(@nodes - other)
  else
    induced_sub(@nodes - other.nodes)
  end
end

#adjacent_index ⇒ `Object`

# File 'lib/chem/utils/ullmann.rb', line 51

def adjacent_index
  nodes.inject([]) do |ret, node|
    ary = ret[nodes.index(node)] = []
    adjacent_to(node).each do |bond, ad_node|
      ary << nodes.index(ad_node)
    end
    ret
  end
end

#assign_2d_geometry ⇒ `Object`

Automatically assigns 2-dimensional geometry This method may implicitly called from ChemRuby if nil is assigned to Atom#x



17
18
19

# File 'lib/chem/utils/geometry.rb', line 17

def assign_2d_geometry
  geometrical_type(nodes[0])
end

#bit_mat ⇒ `Object`

# File 'lib/chem/utils/ullmann.rb', line 61

def bit_mat
  bm = BitMatrix.new(nodes.length, nodes.length)
  if edges.length == 0
    bm.has_matrix = false
  else
    adj = {}
    nodes.each do |node|
      adj[node] = []
      adjacent_to(node).each do |bond, to|
        adj[node] << to
      end
    end

    nodes.each_with_index do |atom1, idx1|
      ary = []
      nodes.each_with_index do |atom2, idx2|
        if adj[atom1].include?(atom2)
          bm.set(idx1, idx2)
        end
      end
    end
  end
  bm
end

#box_size ⇒ `Object`

Return size of molecule with Array [x, y, z]

# File 'lib/chem/utils/geometry.rb', line 7

def box_size
  size_x = nodes().max{|a1, a2| a1.x <=> a2.x}.x - nodes().min{|a1, a2| a1.x <=> a2.x}.x
  size_y = nodes().max{|a1, a2| a1.y <=> a2.y}.y - nodes().min{|a1, a2| a1.y <=> a2.y}.y
  size_z = nodes().max{|a1, a2| a1.z <=> a2.z}.z - nodes().min{|a1, a2| a1.z <=> a2.z}.z
  [size_x, size_y, size_z]
end

#breadth_first_search(root = @nodes[0]) ⇒ `Object` Also known as: bfs

Breadth first search solves steps and path to the each node and forms a tree contains all reachable vertices from the root node.

# File 'lib/chem/utils/traverse.rb', line 7

def breadth_first_search(root = @nodes[0])

  queue = [ root ]

  traversed = []

  while from = queue.shift
	adjacent_to(from).each do |bond, to|
      next if traversed.include?(bond)
      traversed.push(bond)
      queue.push(to) if yield(from, to)
	end
  end
end

#canonical_ring(ring) ⇒ `Object`

Fix me! This is not sufficient



35
36
37

# File 'lib/chem/utils/sssr.rb', line 35

def canonical_ring ring
  ring.sort{|a, b| nodes.index(a) <=> nodes.index(b)}
end

#cdk_BCUT(params) ⇒ `Object`

BCUT Descriptors .… Fix me!



303
304

# File 'lib/chem/utils/cdk.rb', line 303

def cdk_BCUT
end

#cdk_calc_descriptor(name, args = []) ⇒ `Object`

# File 'lib/chem/utils/cdk.rb', line 273

def cdk_calc_descriptor(name, args = [])
  self.cdk_setup
  calc = Rjb::import(DESCRIPTORNAME + name).new
  calc.setParameters(args)
  res = calc.calculate(self.cdk_mol).getValue
  case res._classname
  when "org.openscience.cdk.qsar.result.IntegerResult"
    res.intValue
  when "org.openscience.cdk.qsar.result.DoubleResult"
    res.doubleValue
  when "org.openscience.cdk.qsar.result.IntegerArrayResult"
    (0..(res.size - 1)).to_a.collect{|n| res.get(n)}
  when "org.openscience.cdk.qsar.result.DoubleArrayResult"
    (0..(res.size - 1)).to_a.collect{|n| res.get(n)}
  end
end

#cdk_calc_descriptors ⇒ `Object`

fixme this method does not work very well

# File 'lib/chem/utils/cdk.rb', line 342

def cdk_calc_descriptors
  self.cdk_setup
  engineClass = Rjb::import('org.openscience.cdk.qsar.DescriptorEngine')
  # 1: atom
  # 2: bond?
  # 3: molecule?
  engine = engineClass.new(2)
  engine.process(self.cdk_mol)
end

#cdk_CPSA ⇒ `Object`

CPSA



297
298
299

# File 'lib/chem/utils/cdk.rb', line 297

def cdk_CPSA
  cdk_calc_descriptor('CPSADescriptor')
end

#cdk_find_all_rings ⇒ `Object`

# File 'lib/chem/utils/cdk.rb', line 121

def cdk_find_all_rings
  cdk_setup

  @@ring_finder ||= Rjb::import('org.openscience.cdk.ringsearch.AllRingsFinder').new
  r_p ||= Rjb::import('org.openscience.cdk.ringsearch.RingPartitioner')
  ringset = @@ring_finder.findAllRings(self.cdk_mol)
  enum = r_p.partitionRings(ringset).elements
  rings = []
  while(enum.hasMoreElements)
    ring = []
    ac = r_p.convertToAtomContainer(enum.nextElement)
    atom_enum = ac.atoms
    puts "--"
    while(atom_enum.hasMoreElements)
      ring << cdk2atom[atom_enum.nextElement.hashCode]
    end
    rings << ring
  end
  rings
end

#cdk_fingerprint ⇒ `Object`



212
213
214

# File 'lib/chem/utils/cdk.rb', line 212

def cdk_fingerprint
  'org.openscience.cdk.fingerprint.Fingerprinter'
end

#cdk_gasteiger_marsili_partial_charges(params = {}) ⇒ `Object`

Fix me !

Fail: unknown method name `assignGasteigerMarsiliFactors

# File 'lib/chem/utils/cdk.rb', line 191

def cdk_gasteiger_marsili_partial_charges(params = {})
  cdk_setup
  gm = Rjb::import('org.openscience.cdk.charges.GasteigerMarsiliPartialCharges').new
  gm.setChiCatHydrogen = params[:deoc_hydrogen] if params[:deoc_hydrogen]
  p gm.getStepSize
  p gm.assignGasteigerMarsiliFactors(self.cdk_mol)
#      gm.assignGasteigerMarsiliFactors(self.cdk_mol)
#      gm.assignGasteigerMarsiliPartialCharges(self.cdk_mol, false)
#      gm.assignGasteigerMarsiliPartialCharges(self.cdk_mol, true)
end

#cdk_generate_2D ⇒ `Object` Also known as: cdk_calc_2d

# File 'lib/chem/utils/cdk.rb', line 111

def cdk_generate_2D
  cdk_setup
  @@gen_cls ||= Rjb::import('org.openscience.cdk.layout.StructureDiagramGenerator')
  generator = @@gen_cls.new#(self.cdk_mol)
  generator.setMolecule(self.cdk_mol)
  generator.generateCoordinates
  Chem::CDK::CDKMolecule.new(generator.getMolecule)
end

#cdk_generate_randomly ⇒ `Object`

# File 'lib/chem/utils/cdk.rb', line 164

def cdk_generate_randomly
  cdk_setup
  gen = Rjb::import('org.openscience.cdk.structgen.RandomGenerator').new(self.cdk_mol)
  CDK::CDKMolecule.new(gen.proposeStructure)
end

#cdk_generate_vicinity ⇒ `Object`

# File 'lib/chem/utils/cdk.rb', line 170

def cdk_generate_vicinity
  cdk_setup
  gen = Rjb::import('org.openscience.cdk.structgen.VicinitySampler').new(self.cdk_mol)
  ary = gen.sample(self.cdk_mol)
  enum = ary.elements
  ret = []
  while enum.hasMoreElements
    ret << CDK::CDKMolecule.new(enum.nextElement)
  end
  ret
end

#cdk_hose_code(atom, depth = 3) ⇒ `Object`

Return HOSE code Anal. Chim. Acta. (1978) 103:355-365

# File 'lib/chem/utils/cdk.rb', line 204

def cdk_hose_code(atom, depth = 3)
  hose_gen = Rjb::import('org.openscience.cdk.tools.HOSECodeGenerator').new
  hose_gen.getHOSECode(mol, mol.getAtomAt(9), 3)
end

#cdk_hueckel ⇒ `Object`

HueckelAromaticityDetector

# File 'lib/chem/utils/cdk.rb', line 183

def cdk_hueckel
  cdk_setup
  huckel = Rjb::import('org.openscience.cdk.aromaticity.HueckelAromaticityDetector')
  huckel.detectAromaticity(self.cdk_mol)
end

#cdk_mcs(other) ⇒ `Object`

# File 'lib/chem/utils/cdk.rb', line 258

def cdk_mcs(other)
  self.cdk_setup
  other.cdk_setup

  mcsClass = Rjb::import('org.openscience.cdk.isomorphism.UniversalIsomorphismTester')
  iso = mcsClass.getOverlaps(self.cdk_mol, other.cdk_mol)
  maps = []
  itr = iso.iterator
  while(itr.hasNext)
    maps << CDK::CDKMolecule.new(itr.next)
  end
  maps
end

#cdk_properties ⇒ `Object`

dump CDK properties… useless…

# File 'lib/chem/utils/cdk.rb', line 321

def cdk_properties
  self.cdk_setup
  hash = self.cdk_mol.getProperties
  keys = hash.keys
  while(keys.hasMoreElements)
    k = keys.nextElement
    p k.toString
    if /org.openscience.cdk.qsar.DescriptorSpecification/.match(k.toString)
      p [
        k.getImplementationIdentifier,
        k.getImplementationTitle,
        k.getImplementationVendor,
        k.getSpecificationReference
      ]
    end
    p hash.get(k).toString
  end
end

#cdk_RotatableBondsCount(rot = [true]) ⇒ `Object`

args : terminal atoms must be included in the count



315
316
317

# File 'lib/chem/utils/cdk.rb', line 315

def cdk_RotatableBondsCount(rot = [true])
  cdk_calc_descriptor('RotatableBondsCountDescriptor', rot)
end

#cdk_rule_of_file(params = true) ⇒ `Object`

Lipinki’s Rule of file



308
309
310

# File 'lib/chem/utils/cdk.rb', line 308

def cdk_rule_of_file(params = true)
  cdk_calc_descriptor('RuleOfFiveDescriptor', [params])
end

#cdk_save_as(path, params = {}) ⇒ `Object`

# File 'lib/chem/utils/cdk.rb', line 352

def cdk_save_as(path, params = {})
  self.cdk_setup

  params[:type]   ||= :png
  params[:width]  ||= 100
  params[:height] ||= 100

  image_kit = Rjb::import('net.sf.structure.cdk.util.ImageKit')
  case params[:type]
  when :png
    image_kit.writePNG(self.cdk_mol, params[:width], params[:height], path)
  when :svg
    image_kit.writeSVG(self.cdk_mol, params[:width], params[:height], path)
  when :jpg
    image_kit.writeJPG(self.cdk_mol, params[:width], params[:height], path)
  end
end

#cdk_setup ⇒ `Object`

# File 'lib/chem/utils/cdk.rb', line 216

def cdk_setup
  return unless self.cdk_mol.nil?
  require 'rcdk'
  atom_class = Rjb::import('org.openscience.cdk.Atom')
  bond_class = Rjb::import('org.openscience.cdk.Bond')
  ac         = Rjb::import('org.openscience.cdk.AtomContainer').new
  point3d    = Rjb::import('javax.vecmath.Point3d')
  point2d    = Rjb::import('javax.vecmath.Point2d')
  i = 0
  @cdk2atom = {}
  atoms = nodes.collect{ |node|
    i += 1
    atom = atom_class.new(node.element.to_s)

#        atom.setPoint3d(point3d.new(node.x.to_f, node.y.to_f, node.z.to_f))
#        atom.setPoint2d(point2d.new(node.x.to_f, node.y.to_f, node.z.to_f))

    atom.setSymbol(node.element.to_s)
    node.cdk_atom = atom
    @cdk2atom[atom.hashCode] = node
    atom
  }
  ac.setAtoms(atoms)
  edges.each do |edge, node1, node2|
    atom1 = ac.getAtomAt(nodes.index(node1))
    atom2 = ac.getAtomAt(nodes.index(node2))
    bond = bond_class.new(atom1, atom2, edge.v.to_f)
    ac.addBond(bond)
    @cdk_mol = Rjb::import('org.openscience.cdk.Molecule').new(ac)
  end
  self
end

#cdk_sssr ⇒ `Object`

# File 'lib/chem/utils/cdk.rb', line 142

def cdk_sssr
  cdk_setup

  @@sssr_finder ||= Rjb::import('org.openscience.cdk.ringsearch.SSSRFinder')
  r_p ||= Rjb::import('org.openscience.cdk.ringsearch.RingPartitioner')
  sssr = @@sssr_finder.new(self.cdk_mol)
  enum = r_p.partitionRings(sssr.findSSSR).elements

  rings = []
  while(enum.hasMoreElements)
    ring = []
    ac = r_p.convertToAtomContainer(enum.nextElement)
    atom_enum = ac.atoms
    puts 
    while(atom_enum.hasMoreElements)
      ring << cdk2atom[atom_enum.nextElement.hashCode]
    end
    rings << ring
  end
  rings
end

#cdk_wiener_numbers ⇒ `Object`

Wiener path number Wiener polarity number



292
293
294

# File 'lib/chem/utils/cdk.rb', line 292

def cdk_wiener_numbers
  cdk_calc_descriptor('WienerNumbersDescriptor')
end

#cdk_xlogp ⇒ `Object`

# File 'lib/chem/utils/cdk.rb', line 249

def cdk_xlogp
  self.cdk_setup
  add_hydrogen = Rjb::import('org.openscience.cdk.tools.HydrogenAdder').new
  add_hydrogen.addExplicitHydrogensToSatisfyValency(self.cdk_mol)
  xlogp = Rjb::import('org.openscience.cdk.qsar.descriptors.molecular.XLogPDescriptor').new
  xlogp.setParameters([true, true])
  xlogp.calculate(self.cdk_mol).getValue().doubleValue
end

#composition ⇒ `Object`

Returns composition Chem.open_mol(“benzene”).composition # => 6, :H => 6

# File 'lib/chem/utils/prop.rb', line 82

def composition

  composition = {}
  @nodes.each do |atom|
    composition[atom.element] ||= 0
    composition[atom.element] += 1
  end
  composition
end

#connected? ⇒ `Boolean`

Returns:

(Boolean)

# File 'lib/chem/utils/sub.rb', line 19

def connected?
  traversed = []
  start = @nodes[0]
  traversed << start
  dfs(start) do |from, to|
    traversed << to
  end
  traversed.length == @nodes.length
end

#deep_dup ⇒ `Object`

# File 'lib/chem/utils/sub.rb', line 67

def deep_dup
  ret = dup
  ret.nodes = @nodes.dup
  #ret.adjacencies = @adjacencies.dup if @adjacencies
  ret.edges = @edges.dup
  ret
end

#delete(atom) ⇒ `Object`

# File 'lib/chem/utils/sub.rb', line 60

def delete(atom)
  @nodes.delete(atom)
  adjacent_to(atom).each do |adj_edge, adj_node|
    @edges.delete_if{|bond, atom_a, atom_b| bond == adj_edge}
  end
end

#delete_bond(bond) ⇒ `Object`

# File 'lib/chem/utils/sub.rb', line 53

def delete_bond(bond)
  @edges.delete(bond)
  @adjacencies.each do |v, k|
    k.delete_if{ |b, atom_a, atom_b| bond == b}
  end
end

#depth_first_search(from = , traversed = [], &block) ⇒ `Object` Also known as: dfs

# File 'lib/chem/utils/traverse.rb', line 24

def depth_first_search(from = @nodes[0], traversed = [], &block)
  adjacent_to(from).each do |bond, to|
    next if traversed.include?(bond)
    traversed.push(bond)
    yield(from, to, bond)
    depth_first_search(to, traversed, &block)
  end
end

#divide ⇒ `Object`

divide compounds by connectivity e.g. washing salts.

# File 'lib/chem/utils/sub.rb', line 31

def divide
  traversed = []
  start = @nodes[0]
  divided_compound = []

  while traversed.length != @nodes.length
    part = []
    traversed << start
    part << start
    dfs(start) do |from, to, bond|
      unless part.include?(to)
        traversed << to
        part << to
      end
    end

    start = @nodes.find{|node| !traversed.include?(node)}
    divided_compound << induced_sub(part)
  end
  divided_compound
end

#f_dfs(node, path, max, &block) ⇒ `Object`

# File 'lib/chem/utils/fingerprint.rb', line 30

def f_dfs node, path, max, &block
  if not path.length > max
    yield path
    self.adjacent_to(node).each do |bond, n|
      next if n.element == :H
      if not path.include?(n)
        path.push(n)
        f_dfs(n, path, max, &block)
        path.pop
      end
    end
  end
end

#find_smallest_ring(root) ⇒ `Object`

Chem. Inf. Comput. Sci. 1994, 34, 822-831

Renzo Balducci and Robert S. Pearlman Efficient Exact Solution of the Ring Perception Problem

# File 'lib/chem/utils/sssr.rb', line 17

def find_smallest_ring root
  path = {}
  path[root] = [root]

  bfs(root) do |from, to|
	if visit = !path.keys.include?(to)
	  path[to] = path[from].clone
	  path[to].push(to)
	elsif path[from][-2] != to
	  if 1 == (path[from] & path[to]).length
 return path[from] + path[to][1..-1].reverse
	  end
	end
	visit
  end
end

#find_sssr ⇒ `Object`

Returns Smallest Set of Smallest Ring

# File 'lib/chem/utils/sssr.rb', line 40

def find_sssr

  fullSet = nodes.dup
  trimSet = []
  rings = []
  mol = {}

  nodes.each do |node|
    mol[node] = []
    adjacent_to(node).each do |bond, atom|
      mol[node] << atom
    end
  end

  loop do
	nodesN2 = []
	smallest_degree = 10
	smallest = nil

	mol.each do |k, a|
	  case a.length
	  when 0
 mol.delete(k)# Is this OK?
 trimSet.push(k)
	  when 2
 nodesN2.push(k)
	  end
	  if a.length > 0 && a.length < smallest_degree
 smallest = k
 smallest_degree = a.length
	  end
	end

	case smallest_degree
	when 1
	  trim(mol, smallest)
	when 2
	  nodesN2.each do |k|
 ring = find_smallest_ring(k)
        if ring && !rings.include?(canonical_ring(ring))
          rings.push(canonical_ring(ring))
        end
	  end
	  nodesN2.each do |k|
 trim(mol, k)
	  end
	when 3
	  ring = find_smallest_ring(smallest)
	  trim(mol, smallest)
	end

	break if mol.length  == 0
  end
  rings
end

#fingerprint(max = 3, n_bits = 32) ⇒ `Object`

# File 'lib/chem/utils/fingerprint.rb', line 58

def fingerprint(max = 3, n_bits = 32)

  find_sssr.each do |rings|
    len = rings.length
    rings.each do |atom|
      (atom.rings ||= []) << len
    end
  end

  fp = 0
  set = Set.new

  nodes.each do |node|
    f_dfs(node, [node], max) do |path|
      # Exclude unwanted path
      key = path.collect{|atom| atom.element.to_s}.join(".")
      next if set.include?(key)

      set.add(key)
      set.add(path.reverse.collect{|atom| atom.element.to_s}.join("."))
      # seed calculation
      seed = 0
      path.each_with_index do |atom, idx|
        seed += (1 << ( 5 * idx)) *
          ELEMNUM[atom.element] *
          (atom.rings.nil? ? 1 : (1 << atom.rings.length))
      end
      srand(seed)
      1.times do |n|
        fp |= 1 << rand(n_bits)
      end
    end
  end
  fp
end

#generate_pubchem_subskey ⇒ `Object`

# File 'lib/chem/db/pubchem.rb', line 1028

def generate_pubchem_subskey
  fp = 0
  # Section 1
  self.composition.each do |elem, num|
    HierarchicElementCounts[elem].each do |n_atoms, bit|
      fp |= (1 << bit) if num >= n_atoms
    end
  end

  # Section 2
  [143, 150, 157, 164, 171]
  s2bit = []
  sssrs = self.find_sssr.inject({}){|hash, ring| (hash[ring.size] ||= []) << ring ; hash}
  sssrs.each do |ring_size, rings|
#        base_num = case ring_size
                 
#        p [ring_size, rings.size]
#        p rings.any?{|ring| ring.any?{|atom| atom.element == :N}}
#        p rings.any?{|ring| ring.any?{|atom| atom.element != :C}}
  end

  # Section 3
  self.edges.collect{ |bond, atom1, atom2|
	[atom1.element.to_s, atom2.element.to_s].sort.join("-")
  }.uniq.each do |pair|
    fp |= (1 << Section3[pair]) if Section3[pair]
  end

  # Section 4
  self.nodes.each do |node|
    adj = self.adjacent_to(node).collect{|bond, atom| }
    p Section4[node.element]
  end
#      exit
  

  Section6.each do |smarts, bit|
    pat = Chem::OpenBabel::parse_smarts(smarts)
    fp |= (1 << bit ) if pat.match(self)
  end

  Section7.each do |smarts, bit|
    pat = Chem::OpenBabel::parse_smarts(smarts)
    fp |= (1 << bit ) if pat.match(self)
  end
  fp
end

#hilight(atoms, color = [1, 0, 0]) ⇒ `Object`

# File 'lib/chem/db/vector.rb', line 16

def hilight(atoms, color = [1, 0, 0])
  edges.each do |bond, atom1, atom2|
    bond.color = [1, 0, 0] if atoms.include?(atom1) and atoms.include?(atom2)
  end
  nodes.each{|atom| atom.color = [1, 0, 0] if atoms.include?(atom)}
end

#induced_sub(ary) ⇒ `Object`

# File 'lib/chem/utils/sub.rb', line 11

def induced_sub ary
  sub = deep_dup
  (sub.nodes - ary).each do |node|
    sub.delete(node)
  end
  sub
end

#match(target, &block) ⇒ `Object`

# File 'lib/chem/utils/ullmann.rb', line 24

def match(target, &block)
  ary = nil

  if block_given?
    ary = match_by_ullmann(target){ |i, j|
      yield(self.nodes[i], target.nodes[j])
    }
  else
    ary = match_by_ullmann(target)
  end

  ret = []
  ary.each do |a|
    hash = {}
    a.each_with_index do |i, j|
      hash[nodes[j]] = target.nodes[i]
    end
    hash
    ret << hash
  end
  ret
end

#match_by_ullmann(target, &block) ⇒ `Object`

# File 'lib/chem/utils/ullmann.rb', line 19

def match_by_ullmann(target, &block)
  require 'subcomp'
  Chem.match_by_ullmann(self, target, &block)
end

#molecular_weight(prop = {}) ⇒ `Object` Also known as: mw

Returns molecular weight mol.molecular_weight :unknown_atom => true

# File 'lib/chem/utils/prop.rb', line 44

def molecular_weight prop = {}
  comp = self.composition()
  comp.inject(0.0){|ret, (el, n)|
    if AtomicWeight[el]
      ret + AtomicWeight[el] * n
    elsif prop[:neglect_unknown_atom]
      ret
    else
      return nil
    end
  }
end

#n_hydrogen(node) ⇒ `Object`

Returns number of hydrogen this method may be overrided

# File 'lib/chem/utils/prop.rb', line 34

def n_hydrogen node
  n_h = node.natural_bond_order
  adjacent_to(node).each do |bond, atom|
    n_h -= bond.v
  end
  n_h
end

#ob_export_as(filetype) ⇒ `Object`

# File 'lib/chem/utils/openbabel.rb', line 22

def ob_export_as(filetype)
  use_open_babel if @ob_mol.nil?
  conv = ::OpenBabel::OBConversion.new
  conv.set_out_format(filetype.to_s)
  conv.write_string(@ob_mol)
end

#ob_save_as(path, filetype) ⇒ `Object`

# File 'lib/chem/utils/openbabel.rb', line 15

def ob_save_as(path, filetype)
  use_open_babel if @ob_mol.nil?
  conv = ::OpenBabel::OBConversion.new
  conv.set_out_format(filetype.to_s)
  conv.write_file(@ob_mol, path)
end

#oxidation_number(node) ⇒ `Object`

Returns oxidation number of node this method can be moved to Atom module

# File 'lib/chem/utils/prop.rb', line 61

def oxidation_number node
  en = 0
  adjacent_to(node).each do |bond, atom|
    case node.electro_negativity <=> atom.electro_negativity
    when -1
      en += bond.v
    when 1
      en -= bond.v
    end
  end
  # implicit hydrogen
  if ElectroNegativity[:H] < node.electro_negativity
    en -= n_hydrogen(node)
  else
    en += n_hydrogen(node)
  end
  en
end

#pubchem_subskeys ⇒ `Object`

Extract PubChem substructural keys see ftp.ncbi.nlm.nih.gov/pubchem/specifications/pubchem_fingerprints.txt

# File 'lib/chem/db/pubchem.rb', line 1078

def pubchem_subskeys
  require 'base64'
  fp = 0
  b64 = self.sdf_data["PUBCHEM_CACTVS_SUBSKEYS"]
  Base64.decode64(b64).unpack("N*")[1..-1].each_with_index do |bit, idx|
#        fp += (bit << (881 - (idx + 1) * 32))
    bb = 0
    0.upto(31) do |n|
      bb += (1 << n) if (bit & (1 << (31 - n)) != 0)
    end
    fp += (bb << (idx * 32))
  end
  fp
end

#remove_hydrogens! ⇒ `Object`

# File 'lib/chem/utils.rb', line 21

def remove_hydrogens!
  hyd = nodes.select{|atom| atom.element == :H}
  @edges = @edges.reject{|b, f, t| hyd.include?(f) or hyd.include?(t)}
  @nodes = @nodes - hyd
end

#save(filename, params = {}, &block) ⇒ `Object`

Saves files for arbitrary format. file type is automatically detected by file extensions.

You can optionally pass parameters as second argument.

Options

:type: > :png # Explicit file type

# File 'lib/chem.rb', line 109

def save(filename, params = {}, &block)

  format_type = params[:type]
  format = ChemTypeRegistry.find{|format| format.detect_type format_type}

  unless format_type
    format = ChemTypeRegistry.find{|format| format.detect_file filename}
  else
    format = ChemTypeRegistry.find{|format| format.detect_type format_type}
  end

  unless format
    raise(NotImplementedError)
  end
  format.save(self, filename, params)
end

#save_as_mdl(filename) ⇒ `Object`

# File 'lib/chem/db/mdl.rb', line 14

def save_as_mdl(filename)
  File.open(filename, "w") do |out|
    now = DateTime.now
    out.puts 
    out.puts MDLHeaderLine2Format % [
      "  ",
      "ChemRuby",
      now.month,
      now.mday,
      now.year % 2000,
      now.hour,
      now.min
    ]
    out.puts filename
    out.puts MDLCountLineFormat % [nodes.length, edges.length, 0, 0, 0, 0, 0, 0, 0]
    nodes.each do |node|
      out.puts node.to_mdl
    end
    edges.each do |edge, atom1, atom2|
      out.puts edge.to_mdl(nodes.index(atom1) + 1, nodes.index(atom2) + 1)
    end
    out.puts "M  END"
  end
end

#save_as_pdf(out, params = {}) ⇒ `Object`

Explicitly save molecule as PDF

Example:

mol = Chem.open_mol("benzene.mol")
mol.save_as_pdf("benzene.pdf")
mol.save("benzene.pdf", :type => :pdf)
mol.save("benzene.pdf") # File type will automatically detected from file extensions

# File 'lib/chem/db/vector.rb', line 11

def save_as_pdf(out, params = {})
  v = PDFWriter.new(self, params)
  v.save(out)
end

#subset_in_composition?(to) ⇒ `Boolean`

return 1 if self.composition > to.composition return 0 if self.composition == to.composition return -1 if self.composition < to.composition return false if self.composition <> to.composition

Returns:

(Boolean)

# File 'lib/chem/utils/prop.rb', line 96

def subset_in_composition?(to)
  self_is_sub = false
  to_is_sub   = false
  all = (to.composition.keys + composition.keys).uniq
  return false if all.length == 0
  if (all - composition.keys).length > 0 && (all - to.composition.keys).length > 0
    return false
  elsif (all - composition.keys).length > 0
    return -1 if composition.all?{|k, v| v <= to.composition[k]}
    return false
  elsif (all - to.composition.keys).length > 0
    return 1 if to.composition.all?{|k, v| v <= composition[k]}
    return false
  elsif all.length == composition.keys.length && all.length == to.composition.length
    # then compare number of nodes ?
    if all.all? { |node| composition[node] == to.composition[node]}
      return 0
    elsif all.all?{ |node| composition[node] >= to.composition[node]}
      return 1
    elsif all.all?{ |node| composition[node] <= to.composition[node]}
      return -1
    end
  end
  return false
end

#to_cansmi ⇒ `Object`

Returns Canonical SMILES

# File 'lib/chem/db/cansmi.rb', line 11

def to_cansmi
  cycle = 0
  priority = canonical_smiles_priority_from_invariant
  new_priority, n = update_priority(priority)
#      show new_priority
  prev_n = 0
  while prev_n != n
    prev_n = n
    new_priority = calc_prime_product(new_priority)
#        show new_priority
    new_priority, n = update_priority(new_priority)
#        show new_priority
  end

  puts
  for node in @nodes
    p new_priority[node]
  end
  show new_priority
  start = new_priority.min{|a, b| a[1] <=> b[1]}[0]
  get_tree(start, new_priority)
#      get_canonical_smiles start, new_priority
end

#to_eps(para = EpsParameter.new) ⇒ `Object`

# File 'lib/chem/db/eps.rb', line 13

def to_eps(para = EpsParameter.new)
  # What should I do to ensure 2D features?

  str = ''
  if block_given?
    yield para
  end

  ratio, min = para.calc_bounding_box_size(@nodes)

  str = header(para)

  pos = {}

  @nodes.each do |atom|
    pos[atom] = Vector[atom.x, atom.y]
    pos[atom] -= min

    #diff = diff == 0 ? 1 : diff
    pos[atom] *= para.diff * 100
    pos[atom] += para.orig_pt + Vector[para.margin, para.margin] + ratio * 0.5

#         if para.has_atom_yield
#           str += eps.atom_yield.call(atom)
#         end
#        str += atom.eps_header if atom.eps_header
#        if(atom.visible)
      str += "%5f %5f moveto\n" % [pos[atom][0], pos[atom][1]]
      str += "(" + atom.element.to_s + ") dup stringwidth pop 2 div neg -1.5 rmoveto show\n"

#        end
#        str += atom.eps_footer if atom.eps_footer
  end
#      @nodes.each do ||

  @edges.each do |bond, atom1, atom2|
    #str += bond.eps_header if bond.eps_header
    beginX = pos[atom1][0]
    beginY = pos[atom1][1]
    endX   = pos[atom2][0]
    endY   = pos[atom2][1]
    dx = (endX - beginX) / ((endX - beginX)**2 + (endY - beginY)**2)**0.5
    dx = dx.nan? ? 0 : dx / 2.0
    dy = (endY - beginY) / ((endX - beginX)**2 + (endY - beginY)**2)**0.5
    dy = dy.nan? ? 0 : dy / 2.0
    if(atom2.visible)
      endX = endX - char_height * dx
      endY = endY - char_height * dy
    end
    if(atom1.visible)
      beginX = beginX + char_size * dx
      beginY = beginY + char_size * dy
    end
    transition = bond.respond_to?('i') ? bond.i : 0
    multi_bond_ratio = 1.0
    beginX = beginX - dy * (bond.v - 1 + transition.abs) * multi_bond_ratio
    beginY = beginY + dx * (bond.v - 1 + transition.abs) * multi_bond_ratio
    endX   = endX   - dy * (bond.v - 1 + transition.abs) * multi_bond_ratio
    endY   = endY   + dx * (bond.v - 1 + transition.abs) * multi_bond_ratio
    valence = bond.v
#        1.upto(bond.v + transition.abs) do |n|
    (bond.v + transition.abs).times do |n|
#           if(color)
#             if(transition < 0)
#               str += "1 0 0 setrgbcolor\n"
#             elsif(transition > 0)
#               str += "0 0 1 setrgbcolor\n"
#             else
#               str += "0 0 0 setrgbcolor\n"
#             end
#           end
      str += "newpath %f %f moveto %f %f lineto stroke\n" % [beginX, beginY, endX, endY]
      centerX = (endX + beginX) /2
      centerY = (endY + beginY) /2
            if(transition >0)
              str += centerX.to_s + " " + centerY.to_s + " " + inbond.to_s + " 0 360 arc stroke\n"
    elsif(transition <0)
      str += "newpath %f %f moveto %f %f lineto stroke\n" %
             [centerX + dy - dx*outbond, centerY - dx - outbond * dy,
              centerX - dy - outbond * dx, dx - outbond * dy + centerY]
      str += "newpath %f %f moveto %f %f lineto stroke\n" %
             [centerX + dy + dx*outbond, centerY - dx + outbond * dy, 
              centerX - dy + outbond * dx, dy * outbond + dx + centerY]
    end
    transition = transition + 1 if(transition < 0)
    transition = transition - 1 if(transition > 0)
    valence = valence - 1
    beginX = beginX + dy  * multi_bond_ratio * 2
    beginY = beginY - dx  * multi_bond_ratio * 2
    endX   = endX   + dy  * multi_bond_ratio * 2
    endY   = endY   - dx  * multi_bond_ratio * 2
    end
  end
#      str += "0 0 0 setrgbcolor\n"
  #      str += " #{@size / 2.0} #{@size / 2.0} #{@size / 2.0 + @margin} 0 360 arc stroke\n"

  #open("test.eps", "w").puts str
  str
end

#to_inchi ⇒ `Object`

# File 'lib/chem/utils/openbabel.rb', line 29

def to_inchi
  use_open_babel
  ob_export_as("inchi").chop
end

#to_sybyl ⇒ `Object`

Return sybyl formatted molecule

7 8	# File 'lib/chem/db/sybyl.rb', line 7 def to_sybyl end

#typ_str ⇒ `Object`



47
48
49

# File 'lib/chem/utils/ullmann.rb', line 47

def typ_str
  nodes.collect{|atom| atom.atomic_number}.pack("l*")
end

#use_open_babel ⇒ `Object`

set OpenBabel OBMol object to instance variable @ob_mol

# File 'lib/chem/utils/openbabel.rb', line 35

def use_open_babel
  begin
    require 'openbabel'
  rescue Exception
    require 'OpenBabel'
  end
  @ob_mol = ::OpenBabel::OBMol.new
  nodes.each do |node|
    atom = @ob_mol.new_atom
    atom.set_atomic_num(Element2Number[node.element])
    atom.set_vector(node.x.to_f, node.y.to_f, node.z.to_f)
    node.ob_atom = atom
  end
  edges.each do |bond, atom1, atom2|
    @ob_mol.add_bond(
                     atom1.ob_atom.get_idx,
                     atom2.ob_atom.get_idx,
                     bond.v.to_i
                     )
  end
end

Module: Chem::Molecule

Overview

Defined Under Namespace

Constant Summary collapse

Instance Attribute Summary collapse

Attributes included from Graph

Instance Method Summary collapse

Methods included from Graph

Dynamic Method Handling

#method_missing(m, *args) ⇒ Object

Instance Attribute Details

#cdk2atom ⇒ Object (readonly)

#cdk_mol ⇒ Object (readonly)

#name ⇒ Object

#ob_mol ⇒ Object (readonly)

#source ⇒ Object

Instance Method Details

#-(other) ⇒ Object

#adjacent_index ⇒ Object

#assign_2d_geometry ⇒ Object

#bit_mat ⇒ Object

#box_size ⇒ Object

#breadth_first_search(root = @nodes[0]) ⇒ Object Also known as: bfs

#canonical_ring(ring) ⇒ Object

#cdk_BCUT(params) ⇒ Object

#cdk_calc_descriptor(name, args = []) ⇒ Object

#cdk_calc_descriptors ⇒ Object

#cdk_CPSA ⇒ Object

#cdk_find_all_rings ⇒ Object

#cdk_fingerprint ⇒ Object

#cdk_gasteiger_marsili_partial_charges(params = {}) ⇒ Object

#cdk_generate_2D ⇒ Object Also known as: cdk_calc_2d

#cdk_generate_randomly ⇒ Object

#cdk_generate_vicinity ⇒ Object

#cdk_hose_code(atom, depth = 3) ⇒ Object

#cdk_hueckel ⇒ Object

#cdk_mcs(other) ⇒ Object

#cdk_properties ⇒ Object

#cdk_RotatableBondsCount(rot = [true]) ⇒ Object

#cdk_rule_of_file(params = true) ⇒ Object

#cdk_save_as(path, params = {}) ⇒ Object

#cdk_setup ⇒ Object

#cdk_sssr ⇒ Object

#cdk_wiener_numbers ⇒ Object

#cdk_xlogp ⇒ Object

#composition ⇒ Object

#connected? ⇒ Boolean

#deep_dup ⇒ Object

#delete(atom) ⇒ Object

#delete_bond(bond) ⇒ Object

#depth_first_search(from = , traversed = [], &block) ⇒ Object Also known as: dfs

#divide ⇒ Object

#f_dfs(node, path, max, &block) ⇒ Object

#find_smallest_ring(root) ⇒ Object

#find_sssr ⇒ Object

#fingerprint(max = 3, n_bits = 32) ⇒ Object

#generate_pubchem_subskey ⇒ Object

#hilight(atoms, color = [1, 0, 0]) ⇒ Object

#induced_sub(ary) ⇒ Object

#match(target, &block) ⇒ Object

#match_by_ullmann(target, &block) ⇒ Object

#molecular_weight(prop = {}) ⇒ Object Also known as: mw

#n_hydrogen(node) ⇒ Object

#ob_export_as(filetype) ⇒ Object

#ob_save_as(path, filetype) ⇒ Object

#oxidation_number(node) ⇒ Object

#pubchem_subskeys ⇒ Object

#remove_hydrogens! ⇒ Object

#save(filename, params = {}, &block) ⇒ Object