Skip to content

Commit baa1567

Browse files
committed
Merge pull request #190 from pierrelb/checkQMGeom
Check QMTP optimized molecular geometries are the desired species. It is possible that the geometry optimization "reacted" the species to a different isomer. This checks that the final geometry is isomorphic with the desired molecule. Bonds are guessed based on distances between atoms. Bond orders (single, double, etc) are not checked, only connectivity.
2 parents 70396d6 + 8c2a3ff commit baa1567

7 files changed

Lines changed: 132 additions & 14 deletions

File tree

rmgpy/molecule/element.pxd

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,5 +30,6 @@ cdef class Element:
3030
cdef public str name
3131
cdef public str symbol
3232
cdef public float mass
33+
cdef public float covRadius
3334

3435
cpdef Element getElement(value)

rmgpy/molecule/element.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
"""
4242

4343
import cython
44+
from rdkit.Chem import GetPeriodicTable
4445

4546
################################################################################
4647

@@ -53,7 +54,7 @@ class ElementError(Exception):
5354
pass
5455

5556
################################################################################
56-
57+
_rdkit_periodic_table = GetPeriodicTable()
5758
class Element:
5859
"""
5960
A chemical element. The attributes are:
@@ -65,6 +66,7 @@ class Element:
6566
`symbol` ``str`` The symbol used for the element
6667
`name` ``str`` The IUPAC name of the element
6768
`mass` ``float`` The mass of the element in kg/mol
69+
`covRadius` ``float`` Covalent bond radius in Angstrom
6870
=========== =============== ================================================
6971
7072
This class is specifically for properties that all atoms of the same element
@@ -76,6 +78,7 @@ def __init__(self, number, symbol, name, mass):
7678
self.symbol = intern(symbol)
7779
self.name = name
7880
self.mass = mass
81+
self.covRadius = _rdkit_periodic_table.GetRcovalent(symbol)
7982

8083
def __str__(self):
8184
"""

rmgpy/molecule/molecule.pxd

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ from .atomtype cimport AtomType
2929
from .group cimport GroupAtom, GroupBond, Group
3030
from .element cimport Element
3131
cimport rmgpy.constants as constants
32+
cimport numpy
3233

3334
################################################################################
3435
cdef public dict _known_smiles
@@ -41,7 +42,7 @@ cdef class Atom(Vertex):
4142
cdef public short charge
4243
cdef public str label
4344
cdef public AtomType atomType
44-
cdef public list coords
45+
cdef public numpy.ndarray coords
4546
cdef public short lonePairs
4647

4748
cpdef bint equivalent(self, Vertex other) except -2
@@ -170,6 +171,8 @@ cdef class Molecule(Graph):
170171

171172
cpdef fromAdjacencyList(self, str adjlist, bint saturateH=?)
172173

174+
cpdef fromXYZ(self, numpy.ndarray atomicNums, numpy.ndarray coordinates)
175+
173176
cpdef str toInChI(self)
174177

175178
cpdef str toAugmentedInChI(self)

rmgpy/molecule/molecule.py

Lines changed: 86 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ class Atom(Vertex):
9898
`spinMultiplicity` ``short`` The spin multiplicity of the atom
9999
`charge` ``short`` The formal charge of the atom
100100
`label` ``str`` A string label that can be used to tag individual atoms
101-
`coords`
101+
`coords` ``numpy array`` The (x,y,z) coordinates in Angstrom
102102
`lonePairs` ``short`` The number of lone electron pairs
103103
=================== =================== ====================================
104104
@@ -107,7 +107,7 @@ class Atom(Vertex):
107107
e.g. ``atom.symbol`` instead of ``atom.element.symbol``.
108108
"""
109109

110-
def __init__(self, element=None, radicalElectrons=0, spinMultiplicity=1, charge=0, label='', lonePairs=0):
110+
def __init__(self, element=None, radicalElectrons=0, spinMultiplicity=1, charge=0, label='', lonePairs=0, coords=None):
111111
Vertex.__init__(self)
112112
if isinstance(element, str):
113113
self.element = elements.__dict__[element]
@@ -119,7 +119,7 @@ def __init__(self, element=None, radicalElectrons=0, spinMultiplicity=1, charge=
119119
self.label = label
120120
self.atomType = None
121121
self.lonePairs = lonePairs
122-
self.coords = list()
122+
self.coords = coords
123123

124124
def __str__(self):
125125
"""
@@ -829,6 +829,51 @@ def deleteHydrogens(self):
829829
for atom in hydrogens:
830830
self.removeAtom(atom)
831831

832+
def connectTheDots(self):
833+
"""
834+
Delete all bonds, and set them again based on the Atoms' coords.
835+
Does not detect bond type.
836+
"""
837+
cython.declare(criticalDistance=float, i=int, atom1=Atom, atom2=Atom,
838+
bond=Bond, atoms=list, zBoundary=float)
839+
# groupBond=GroupBond,
840+
self._fingerprint = None
841+
842+
atoms = self.vertices
843+
844+
# Ensure there are coordinates to work with
845+
for atom in atoms:
846+
assert atom.coords != None
847+
848+
# If there are any bonds, remove them
849+
for atom1 in atoms:
850+
for bond in self.getBonds(atom1):
851+
self.removeEdge(bond)
852+
853+
# Sort atoms by distance on the z-axis
854+
sortedAtoms = sorted(atoms, key=lambda x: x.coords[2])
855+
856+
for i, atom1 in enumerate(sortedAtoms):
857+
for atom2 in sortedAtoms[i+1:]:
858+
# Set upper limit for bond distance
859+
criticalDistance = (atom1.element.covRadius + atom2.element.covRadius + 0.45)**2
860+
861+
# First atom that is more than 4.0 Anstroms away in the z-axis, break the loop
862+
# Atoms are sorted along the z-axis, so all following atoms should be even further
863+
zBoundary = (atom1.coords[2] - atom2.coords[2])**2
864+
if zBoundary > 16.0:
865+
break
866+
867+
distanceSquared = sum((atom1.coords - atom2.coords)**2)
868+
869+
if distanceSquared > criticalDistance or distanceSquared < 0.40:
870+
continue
871+
else:
872+
# groupBond = GroupBond(atom1, atom2, ['S','D','T','B'])
873+
bond = Bond(atom1, atom2, 'S')
874+
self.addBond(bond)
875+
self.updateAtomTypes()
876+
832877
def updateAtomTypes(self):
833878
"""
834879
Iterate through the atoms in the structure, checking their atom types
@@ -1153,6 +1198,44 @@ def fromAdjacencyList(self, adjlist, saturateH=False):
11531198
self.updateAtomTypes()
11541199

11551200
return self
1201+
1202+
def fromXYZ(self, atomicNums, coordinates):
1203+
"""
1204+
Create an RMG molecule from a list of coordinates and a corresponding
1205+
list of atomic numbers. These are typically received from CCLib and the
1206+
molecule is sent to `ConnectTheDots` so will only contain single bonds.
1207+
"""
1208+
1209+
_rdkit_periodic_table = elements.GetPeriodicTable()
1210+
1211+
for i, atNum in enumerate(atomicNums):
1212+
atom = Atom(_rdkit_periodic_table.GetElementSymbol(int(atNum)))
1213+
atom.coords = coordinates[i]
1214+
self.addAtom(atom)
1215+
return self.connectTheDots()
1216+
1217+
def toSingleBonds(self):
1218+
"""
1219+
Returns a copy of the current molecule, consisting of only single bonds.
1220+
1221+
This is useful for isomorphism comparison against something that was made
1222+
via fromXYZ, which does not attempt to perceive bond orders
1223+
"""
1224+
cython.declare(atom1=Atom, atom2=Atom, bond=Bond, newMol=Molecule, atoms=list, mapping=dict)
1225+
1226+
newMol = Molecule()
1227+
atoms = self.atoms
1228+
mapping = {}
1229+
for atom1 in atoms:
1230+
atom2 = newMol.addAtom(Atom(atom1.element))
1231+
mapping[atom1] = atom2
1232+
1233+
for atom1 in atoms:
1234+
for atom2 in atom1.bonds:
1235+
bond = Bond(mapping[atom1], mapping[atom2], 'S')
1236+
newMol.addBond(bond)
1237+
newMol.updateAtomTypes()
1238+
return newMol
11561239

11571240
def toInChI(self):
11581241
"""

rmgpy/qm/gaussian.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from subprocess import Popen, PIPE
66
import re
77

8+
from rmgpy.molecule import Molecule
89
from qmdata import CCLibData
910
from molecule import QMMolecule
1011

@@ -116,9 +117,19 @@ def verifyOutputFile(self):
116117
return False
117118

118119
if InChIMatch:
119-
logging.info("Successful Gaussian quantum result found in {0}".format(self.outputFilePath))
120-
# " + self.molfile.name + " ("+self.molfile.InChIAug+") has been found. This log file will be used.")
121-
return True
120+
# Compare the optimized geometry to the original molecule
121+
qmData = self.parse()
122+
123+
cclibMol = Molecule()
124+
cclibMol.fromXYZ(qmData.atomicNumbers, qmData.atomCoords.value)
125+
testMol = self.molecule.toSingleBonds()
126+
127+
if cclibMol.isIsomorphic(testMol):
128+
logging.info("Successful MOPAC quantum result found in {0}".format(self.outputFilePath))
129+
return True
130+
else:
131+
logging.info("Incorrect connectivity for optimized geometry in file {0}".format(self.outputFilePath))
132+
return False
122133
else:
123134
return False # until the next line works
124135

rmgpy/qm/molecule.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ def generateRDKitGeometries(self, boundsMatrix=None):
6868
distGeomAttempts = 5*(atoms-3) #number of conformer attempts is just a linear scaling with molecule size, due to time considerations in practice, it is probably more like 3^(n-3) or something like that
6969

7070
rdmol, minEid = self.rd_embed(rdmol, distGeomAttempts, boundsMatrix)
71-
self.save_coordinates(rdmol, minEid, rdAtIdx)
71+
self.saveCoordinatesFromRDMol(rdmol, minEid, rdAtIdx)
7272

7373
def rd_build(self):
7474
"""
@@ -104,11 +104,17 @@ def rd_embed(self, rdmol, numConfAttempts, boundsMatrix=None):
104104

105105
return rdmol, minEid
106106

107-
def save_coordinates(self, rdmol, minEid, rdAtIdx):
107+
def saveCoordinatesFromRDMol(self, rdmol, minEid, rdAtIdx):
108108
# Save xyz coordinates on each atom in molecule ****
109109
for atom in self.molecule.atoms:
110110
point = rdmol.GetConformer(minEid).GetAtomPosition(atom.sortingLabel)
111-
atom.coords = [point.x, point.y, point.z]
111+
atom.coords = numpy.array([point.x, point.y, point.z])
112+
113+
def saveCoordinatesFromQMData(self, qmdata):
114+
"""
115+
Save geometry info from QMData (eg CCLibData)
116+
"""
117+
raise NotImplementedError
112118

113119
def loadThermoDataFile(filePath):
114120
"""

rmgpy/qm/mopac.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import logging
55
from subprocess import Popen, PIPE
66

7+
from rmgpy.molecule import Molecule
78
from qmdata import CCLibData
89
from molecule import QMMolecule
910

@@ -127,9 +128,19 @@ def verifyOutputFile(self):
127128
return False
128129

129130
if InChIMatch:
130-
logging.info("Successful MOPAC quantum result found in {0}".format(self.outputFilePath))
131-
# " + self.molfile.name + " ("+self.molfile.InChIAug+") has been found. This log file will be used.")
132-
return True
131+
# Compare the optimized geometry to the original molecule
132+
qmData = self.parse()
133+
134+
cclibMol = Molecule()
135+
cclibMol.fromXYZ(qmData.atomicNumbers, qmData.atomCoords.value)
136+
testMol = self.molecule.toSingleBonds()
137+
138+
if cclibMol.isIsomorphic(testMol):
139+
logging.info("Successful MOPAC quantum result found in {0}".format(self.outputFilePath))
140+
return True
141+
else:
142+
logging.info("Incorrect connectivity for optimized geometry in file {0}".format(self.outputFilePath))
143+
return False
133144

134145
#InChIs do not match (most likely due to limited name length mirrored in log file (240 characters), but possibly due to a collision)
135146
return self.checkForInChiKeyCollision(logFileInChI) # Not yet implemented!

0 commit comments

Comments
 (0)