Module: Chem::Molecule

Includes:
Graph
Included in:
CDX::CDX, GSpanMolecule, KEGG::KCF, KEGG::KCFMolecule, KEGG::KeggCompound, Chem::MDL::MdlMolecule, Chem::MDL::MdlReaction, SmilesMol, Sybyl::SybylMolecule, XYZ::XyzMolecule
Defined in:
lib/chem/db/cansmi.rb,
lib/chem.rb,
lib/chem/model.rb,
lib/chem/db/eps.rb,
lib/chem/db/mdl.rb,
lib/chem/db/sybyl.rb,
lib/chem/db/vector.rb,
lib/chem/utils/sub.rb,
lib/chem/db/pubchem.rb,
lib/chem/utils/prop.rb,
lib/chem/utils/sssr.rb,
lib/chem/utils/ullmann.rb,
lib/chem/utils/geometry.rb,
lib/chem/utils/traverse.rb,
lib/chem/db/types/type_cansmi.rb

Overview

A module for assigning canonical smiles

Defined Under Namespace

Classes: EpsParameter

Constant Summary collapse

EpsHeader =
"%%!PS-Adobe-3.0 EPSF-3.0\n" +
"%%Creator: ChemRuby n.tanaka\n" + 
"%%For: Scientists\n" +
"%%Title: Molecular compound\n" +
"%%CreationDate: %d/%d/%d %d:%d \n"
MDLCountLineFormat =
"%3d%3d%3d%3d%3d%3d%3d%3d%3d  0999 V2000"

Instance Attribute Summary collapse

Attributes included from Graph

#adjacencies, #edges, #nodes

Instance Method Summary collapse

Methods included from Graph

#adj_matrix, #adjacency_list, #adjacent_to, #clustering_coefficient, #connection, #each, #match_by_adj_mat, #match_by_ullmann, #match_exhaustively, #matchable, #matchable_old, #morgan

Instance Attribute Details

#nameObject

Returns name of molecule. default value is self.source



110
111
112
# File 'lib/chem/model.rb', line 110

def name
  @name ? @name : self.souce
end

#sourceObject

Returns source of molecule. default value is “”



104
105
106
# File 'lib/chem/model.rb', line 104

def source
  @source ? @source : ""
end

Instance Method Details

#-(other) ⇒ Object



69
70
71
72
73
74
75
# File 'lib/chem/utils/sub.rb', line 69

def - (other)
  if other.instance_of?(Array)
    induced_sub(@nodes - other)
  else
    induced_sub(@nodes - other.nodes)
  end
end

#assign_2d_geometryObject

Automatically assigns 2-dimensional geometry This method may implicitly called from ChemRuby if nil is assigned to Atom#x



9
10
11
# File 'lib/chem/utils/geometry.rb', line 9

def assign_2d_geometry
  geometrical_type(nodes[0])
end

#breadth_first_search(root = @nodes[0]) ⇒ Object Also known as: bfs

Breadth first search solves steps and path to the each node and forms a tree contains all reachable vertices from the root node.



7
8
9
10
11
12
13
14
15
16
17
18
19
20
# File 'lib/chem/utils/traverse.rb', line 7

def breadth_first_search(root = @nodes[0])

  queue = [ root ]

  traversed = []

  while from = queue.shift
	adjacent_to(from).each do |bond, to|
      next if traversed.include?(bond)
      traversed.push(bond)
      queue.push(to) if yield(from, to)
	end
  end
end

#canonical_ring(ring) ⇒ Object



34
35
36
37
# File 'lib/chem/utils/sssr.rb', line 34

def canonical_ring ring
#      ring.sort # Fix me! This is not sufficient
  ring.sort{|a, b| @atoms.index(a) <=> @atoms.index(b)}
end

#compositionObject

Returns composition Chem.open_mol(“benzene”).composition # => 6, :H => 6



82
83
84
85
86
87
88
89
90
# File 'lib/chem/utils/prop.rb', line 82

def composition

  composition = {}
  @nodes.each do |atom|
    composition[atom.element] ||= 0
    composition[atom.element] += 1
  end
  composition
end

#connected?Boolean

Returns:

  • (Boolean)


13
14
15
16
17
18
19
20
21
# File 'lib/chem/utils/sub.rb', line 13

def connected?
  traversed = []
  start = @nodes[0]
  traversed << start
  dfs(start) do |from, to|
    traversed << to
  end
  traversed.length == @nodes.length
end

#deep_dupObject



61
62
63
64
65
66
67
# File 'lib/chem/utils/sub.rb', line 61

def deep_dup
  ret = dup
  ret.nodes = @nodes.dup
  #ret.adjacencies = @adjacencies.dup if @adjacencies
  ret.edges = @edges.dup
  ret
end

#delete(atom) ⇒ Object



54
55
56
57
58
59
# File 'lib/chem/utils/sub.rb', line 54

def delete(atom)
  @nodes.delete(atom)
  adjacent_to(atom).each do |adj_edge, adj_node|
    @edges.delete_if{|bond, atom_a, atom_b| bond == adj_edge}
  end
end

#delete_bond(bond) ⇒ Object



47
48
49
50
51
52
# File 'lib/chem/utils/sub.rb', line 47

def delete_bond(bond)
  @edges.delete(bond)
  @adjacencies.each do |v, k|
    k.delete_if{ |b, atom_a, atom_b| bond == b}
  end
end

#depth_first_search(from = , traversed = [], &block) ⇒ Object Also known as: dfs



24
25
26
27
28
29
30
31
# File 'lib/chem/utils/traverse.rb', line 24

def depth_first_search(from = @nodes[0], traversed = [], &block)
  adjacent_to(from).each do |bond, to|
    next if traversed.include?(bond)
    traversed.push(bond)
    yield(from, to, bond)
    depth_first_search(to, traversed, &block)
  end
end

#divideObject

divide compounds by connectivity e.g. washing salts.



25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# File 'lib/chem/utils/sub.rb', line 25

def divide
  traversed = []
  start = @nodes[0]
  divided_compound = []

  while traversed.length != @nodes.length
    part = []
    traversed << start
    part << start
    dfs(start) do |from, to, bond|
      unless part.include?(to)
        traversed << to
        part << to
      end
    end

    start = @nodes.find{|node| !traversed.include?(node)}
    divided_compound << induced_sub(part)
  end
  divided_compound
end

#find_smallest_ring(root) ⇒ Object

  1. Chem. Inf. Comput. Sci. 1994, 34, 822-831

Renzo Balducci and Robert S. Pearlman Efficient Exact Solution of the Ring Perception Problem



17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# File 'lib/chem/utils/sssr.rb', line 17

def find_smallest_ring root
  path = {}
  path[root] = [root]

  bfs(root) do |from, to|
	if visit = !path.keys.include?(to)
	  path[to] = path[from].clone
	  path[to].push(to)
	elsif path[from][-2] != to
	  if 1 == (path[from] & path[to]).length
 return path[from] + path[to][1..-1].reverse
	  end
	end
	visit
  end
end

#find_sssrObject



39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# File 'lib/chem/utils/sssr.rb', line 39

def find_sssr
  return @sssr if @sssr
  fullSet = []
  trimSet = []
  rings = []
  @mol = {}
#     mol = {1=>[2, 5], 2=> [1, 3], 3=> [2, 4], 4=>[3, 5], 5=>[4,1]}
#       @nodes.each do |k, atom|
# 	atom.set_neighbor
# 	@mol[atom] = atom.neighbor
#       end

  loop do
	nodesN2 = []
	smallest_degree = 10
	smallest = nil
	@mol.each do |k, a|
	  case a.length
	  when 0
 @mol.delete(k)# Is this OK?
 trimSet.push(k)
	  when 2
 nodesN2.push(k)
	  end
	  if a.length > 0 && a.length < smallest_degree
 smallest = k
 smallest_degree = a.length
	  end
	end
	case smallest_degree
	when 1
	  trim(smallest)
	when 2
	  nodesN2.each do |k|
 ring = find_smallest_ring(k)
#	    rings.push(canonical_ring(ring)) if !rings.include?(canonical_ring(ring))
 rings.push(canonical_ring(ring)) if ring && !rings.include?(canonical_ring(ring))
	  end
	  nodesN2.each do |k|
 trim(k)
	  end
	when 3
	  ring = find_smallest_ring(smallest)
	  trim(smallest)
	end
	break if @mol.length  == 0
  end
  @sssr = rings
end

#induced_sub(ary) ⇒ Object



5
6
7
8
9
10
11
# File 'lib/chem/utils/sub.rb', line 5

def induced_sub ary
  sub = deep_dup
  (sub.nodes - ary).each do |node|
    sub.delete(node)
  end
  sub
end

#molecular_weight(prop = {}) ⇒ Object Also known as: mw

Returns molecular weight mol.molecular_weight :unknown_atom => true



44
45
46
47
48
49
50
51
52
53
54
55
# File 'lib/chem/utils/prop.rb', line 44

def molecular_weight prop = {}
  comp = self.composition()
  comp.inject(0.0){|ret, (el, n)|
    if AtomicWeight[el]
      ret + AtomicWeight[el] * n
    elsif prop[:neglect_unknown_atom]
      ret
    else
      return nil
    end
  }
end

#n_hydrogen(node) ⇒ Object

Returns number of hydrogen this method may be overrided



34
35
36
37
38
39
40
# File 'lib/chem/utils/prop.rb', line 34

def n_hydrogen node
  n_h = node.natural_bond_order
  adjacent_to(node).each do |bond, atom|
    n_h -= bond.v
  end
  n_h
end

#oxidation_number(node) ⇒ Object

Returns oxidation number of node this method can be moved to Atom module



61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# File 'lib/chem/utils/prop.rb', line 61

def oxidation_number node
  en = 0
  adjacent_to(node).each do |bond, atom|
    case node.electro_negativity <=> atom.electro_negativity
    when -1
      en += bond.v
    when 1
      en -= bond.v
    end
  end
  # implicit hydrogen
  if ElectroNegativity[:H] < node.electro_negativity
    en -= n_hydrogen(node)
  else
    en += n_hydrogen(node)
  end
  en
end

#save(filename, params = {}, &block) ⇒ Object

Saves files for arbitrary format. file type is automatically detected by file extensions.

You can optionally pass parameters as second argument.

Options

:type

> :png # Explicit file type



101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# File 'lib/chem.rb', line 101

def save(filename, params = {}, &block)

  format_type = params[:type]
  format = ChemTypeRegistry.find{|format| format.detect_type format_type}

  unless format_type
    format = ChemTypeRegistry.find{|format| format.detect_file filename}
  else
    format = ChemTypeRegistry.find{|format| format.detect_type format_type}
  end

  unless format
    raise(NotImplementedError)
  end
  format.save(self, filename, params)
end

#save_as_mdl(filename) ⇒ Object



12
13
14
15
16
17
18
19
20
21
22
23
24
25
# File 'lib/chem/db/mdl.rb', line 12

def save_as_mdl filename
  File.open(filename, "w") do |out|
    out.puts filename
    out.puts # ChemRuby
    out.puts
    out.puts MDLCountLineFormat % [nodes.length, edges.length, 0, 0, 0, 0, 0, 0, 0]
    nodes.each do |node|
      out.puts node.to_mdl
    end
    edges.each do |edge, atom1, atom2|
      out.puts edge.to_mdl(nodes.index(atom1) + 1, nodes.index(atom2) + 1)
    end
  end
end

#save_as_pdf(out, params = {}) ⇒ Object

Explicitly save molecule as PDF

Example:

mol = Chem.open_mol("benzene.mol")
mol.save_as_pdf("benzene.pdf")
mol.save("benzene.pdf", :type => :pdf)
mol.save("benzene.pdf") # File type will automatically detected from file extensions


11
12
13
14
# File 'lib/chem/db/vector.rb', line 11

def save_as_pdf out, params = {}
  v = PDFWriter.new(self, params)
  v.save(out)
end

#search_pubchemObject



15
16
# File 'lib/chem/db/pubchem.rb', line 15

def search_pubchem
end

#subset_in_composition?(to) ⇒ Boolean

return 1 if self.composition > to.composition return 0 if self.composition == to.composition return -1 if self.composition < to.composition return false if self.composition <> to.composition

Returns:

  • (Boolean)


96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# File 'lib/chem/utils/prop.rb', line 96

def subset_in_composition?(to)
  self_is_sub = false
  to_is_sub   = false
  all = (to.composition.keys + composition.keys).uniq
  return false if all.length == 0
  if (all - composition.keys).length > 0 && (all - to.composition.keys).length > 0
    return false
  elsif (all - composition.keys).length > 0
    return -1 if composition.all?{|k, v| v <= to.composition[k]}
    return false
  elsif (all - to.composition.keys).length > 0
    return 1 if to.composition.all?{|k, v| v <= composition[k]}
    return false
  elsif all.length == composition.keys.length && all.length == to.composition.length
    # then compare number of nodes ?
    if all.all? { |node| composition[node] == to.composition[node]}
      return 0
    elsif all.all?{ |node| composition[node] >= to.composition[node]}
      return 1
    elsif all.all?{ |node| composition[node] <= to.composition[node]}
      return -1
    end
  end
  return false
end

#to_cansmiObject

Returns Canonical SMILES



11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# File 'lib/chem/db/cansmi.rb', line 11

def to_cansmi
  cycle = 0
  priority = canonical_smiles_priority_from_invariant
  new_priority, n = update_priority(priority)
#      show new_priority
  prev_n = 0
  while prev_n != n
    prev_n = n
    new_priority = calc_prime_product(new_priority)
#        show new_priority
    new_priority, n = update_priority(new_priority)
#        show new_priority
  end

  puts
  for node in @nodes
    p new_priority[node]
  end
  show new_priority
  start = new_priority.min{|a, b| a[1] <=> b[1]}[0]
  get_tree(start, new_priority)
#      get_canonical_smiles start, new_priority
end

#to_eps(para = EpsParameter.new) ⇒ Object



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# File 'lib/chem/db/eps.rb', line 13

def to_eps(para = EpsParameter.new)
  # What should I do to ensure 2D features?

  str = ''
  if block_given?
    yield para
  end

  ratio, min = para.calc_bounding_box_size(@nodes)

  str = header(para)

  pos = {}

  @nodes.each do |atom|
    pos[atom] = Vector[atom.x, atom.y]
    pos[atom] -= min

    #diff = diff == 0 ? 1 : diff
    pos[atom] *= para.diff * 100
    pos[atom] += para.orig_pt + Vector[para.margin, para.margin] + ratio * 0.5

#         if para.has_atom_yield
#           str += eps.atom_yield.call(atom)
#         end
#        str += atom.eps_header if atom.eps_header
#        if(atom.visible)
      str += "%5f %5f moveto\n" % [pos[atom][0], pos[atom][1]]
      str += "(" + atom.element.to_s + ") dup stringwidth pop 2 div neg -1.5 rmoveto show\n"

#        end
#        str += atom.eps_footer if atom.eps_footer
  end
#      @nodes.each do ||

  @edges.each do |bond, atom1, atom2|
    #str += bond.eps_header if bond.eps_header
    beginX = pos[atom1][0]
    beginY = pos[atom1][1]
    endX   = pos[atom2][0]
    endY   = pos[atom2][1]
    dx = (endX - beginX) / ((endX - beginX)**2 + (endY - beginY)**2)**0.5
    dx = dx.nan? ? 0 : dx / 2.0
    dy = (endY - beginY) / ((endX - beginX)**2 + (endY - beginY)**2)**0.5
    dy = dy.nan? ? 0 : dy / 2.0
    if(atom2.visible)
      endX = endX - char_height * dx
      endY = endY - char_height * dy
    end
    if(atom1.visible)
      beginX = beginX + char_size * dx
      beginY = beginY + char_size * dy
    end
    transition = bond.respond_to?('i') ? bond.i : 0
    multi_bond_ratio = 1.0
    beginX = beginX - dy * (bond.v - 1 + transition.abs) * multi_bond_ratio
    beginY = beginY + dx * (bond.v - 1 + transition.abs) * multi_bond_ratio
    endX   = endX   - dy * (bond.v - 1 + transition.abs) * multi_bond_ratio
    endY   = endY   + dx * (bond.v - 1 + transition.abs) * multi_bond_ratio
    valence = bond.v
#        1.upto(bond.v + transition.abs) do |n|
    (bond.v + transition.abs).times do |n|
#           if(color)
#             if(transition < 0)
#               str += "1 0 0 setrgbcolor\n"
#             elsif(transition > 0)
#               str += "0 0 1 setrgbcolor\n"
#             else
#               str += "0 0 0 setrgbcolor\n"
#             end
#           end
      str += "newpath %f %f moveto %f %f lineto stroke\n" % [beginX, beginY, endX, endY]
      centerX = (endX + beginX) /2
      centerY = (endY + beginY) /2
            if(transition >0)
              str += centerX.to_s + " " + centerY.to_s + " " + inbond.to_s + " 0 360 arc stroke\n"
    elsif(transition <0)
      str += "newpath %f %f moveto %f %f lineto stroke\n" %
             [centerX + dy - dx*outbond, centerY - dx - outbond * dy,
              centerX - dy - outbond * dx, dx - outbond * dy + centerY]
      str += "newpath %f %f moveto %f %f lineto stroke\n" %
             [centerX + dy + dx*outbond, centerY - dx + outbond * dy, 
              centerX - dy + outbond * dx, dy * outbond + dx + centerY]
    end
    transition = transition + 1 if(transition < 0)
    transition = transition - 1 if(transition > 0)
    valence = valence - 1
    beginX = beginX + dy  * multi_bond_ratio * 2
    beginY = beginY - dx  * multi_bond_ratio * 2
    endX   = endX   + dy  * multi_bond_ratio * 2
    endY   = endY   - dx  * multi_bond_ratio * 2
    end
  end
#      str += "0 0 0 setrgbcolor\n"
  #      str += " #{@size / 2.0} #{@size / 2.0} #{@size / 2.0 + @margin} 0 360 arc stroke\n"

  #open("test.eps", "w").puts str
  str
end

#to_sybylObject

Return sybyl formatted molecule



7
8
# File 'lib/chem/db/sybyl.rb', line 7

def to_sybyl
end

#trim(smallest) ⇒ Object



89
90
91
92
93
94
95
96
97
# File 'lib/chem/utils/sssr.rb', line 89

def trim smallest
  if @mol.length > 0 && @mol.include?(smallest)
	@mol[smallest].each do |n|
	  @mol[n] = @mol[n] - [smallest]
	  @mol.delete(smallest)
	  @mol.delete(n) if @mol[n].length == 0
	end
  end
end