Skip to content

Commit

Permalink
Save atom hybridization (#408)
Browse files Browse the repository at this point in the history
* save the atom hybridization when serialising SMCs

* warn when loading from old serialised SMCs without hybridisation information

---------

Co-authored-by: Irfan Alibay <[email protected]>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
3 people authored Dec 4, 2024
1 parent f8c49d5 commit 5dd22ef
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 3 deletions.
26 changes: 26 additions & 0 deletions gufe/components/smallmoleculecomponent.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# For details, see https://github.com/OpenFreeEnergy/gufe

import logging
import warnings

# openff complains about oechem being missing, shhh
logger = logging.getLogger("openff.toolkit")
Expand Down Expand Up @@ -68,6 +69,20 @@
}
_BONDSTEREO_TO_INT = {v: k for k, v in _INT_TO_BONDSTEREO.items()}

# following the numbering in rdkit
_INT_TO_HYBRIDIZATION = {
0: Chem.rdchem.HybridizationType.UNSPECIFIED,
1: Chem.rdchem.HybridizationType.S,
2: Chem.rdchem.HybridizationType.SP,
3: Chem.rdchem.HybridizationType.SP2,
4: Chem.rdchem.HybridizationType.SP3,
5: Chem.rdchem.HybridizationType.SP2D,
6: Chem.rdchem.HybridizationType.SP3D,
7: Chem.rdchem.HybridizationType.SP3D2,
8: Chem.rdchem.HybridizationType.OTHER,
}
_HYBRIDIZATION_TO_INT = {v: k for k, v in _INT_TO_HYBRIDIZATION.items()}


def _setprops(obj, d: dict) -> None:
# add props onto rdkit "obj" (atom/bond/mol/conformer)
Expand Down Expand Up @@ -223,6 +238,7 @@ def _to_dict(self) -> dict:
_ATOMCHIRAL_TO_INT[atom.GetChiralTag()],
atom.GetAtomMapNum(),
atom.GetPropsAsDict(includePrivate=False),
_HYBRIDIZATION_TO_INT[atom.GetHybridization()],
)
)
output["atoms"] = atoms
Expand Down Expand Up @@ -264,6 +280,16 @@ def _from_dict(cls, d: dict):
a.SetChiralTag(_INT_TO_ATOMCHIRAL[atom[4]])
a.SetAtomMapNum(atom[5])
_setprops(a, atom[6])
try:
a.SetHybridization(_INT_TO_HYBRIDIZATION[atom[7]])
except IndexError:
warnings.warn(
"The atom hybridization data was not found and has been set to unspecified. This can be"
" fixed by recreating the SmallMoleculeComponent from the rdkit molecule after running "
"sanitization."
)
pass

em.AddAtom(a)

for bond in d["bonds"]:
Expand Down
6 changes: 3 additions & 3 deletions gufe/tests/data/ligand_network.graphml
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@
<key id="d0" for="node" attr.name="moldict" attr.type="string" />
<graph edgedefault="directed">
<node id="mol0">
<data key="d0">{":version:": 1, "__module__": "gufe.components.smallmoleculecomponent", "__qualname__": "SmallMoleculeComponent", "atoms": [[6, 0, 0, false, 0, 0, {}], [6, 0, 0, false, 0, 0, {}]], "bonds": [[0, 1, 1, 0, {}]], "conformer": ["\u0093NUMPY\u0001\u0000v\u0000{'descr': '&lt;f8', 'fortran_order': False, 'shape': (2, 3), } \n\u0000\u0000\u0000\u0000\u0000\u0000\u00e8\u00bf\u0000\u0000\u0000\u0000\u0000\u0000\u0090&lt;\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u00e8?\u0000\u0000\u0000\u0000\u0000\u0000\u0090\u00bc\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000", {}], "molprops": {"ofe-name": ""}}</data>
<data key="d0">{":version:": 1, "__module__": "gufe.components.smallmoleculecomponent", "__qualname__": "SmallMoleculeComponent", "atoms": [[6, 0, 0, false, 0, 0, {}, 4], [6, 0, 0, false, 0, 0, {}, 4]], "bonds": [[0, 1, 1, 0, {}]], "conformer": ["\u0093NUMPY\u0001\u0000v\u0000{'descr': '&lt;f8', 'fortran_order': False, 'shape': (2, 3), } \n\u0000\u0000\u0000\u0000\u0000\u0000\u00e8\u00bf\u0000\u0000\u0000\u0000\u0000\u0000\u0090&lt;\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u00e8?\u0000\u0000\u0000\u0000\u0000\u0000\u0090\u00bc\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000", {}], "molprops": {"ofe-name": ""}}</data>
</node>
<node id="mol1">
<data key="d0">{":version:": 1, "__module__": "gufe.components.smallmoleculecomponent", "__qualname__": "SmallMoleculeComponent", "atoms": [[6, 0, 0, false, 0, 0, {}], [6, 0, 0, false, 0, 0, {}], [8, 0, 0, false, 0, 0, {}]], "bonds": [[0, 1, 1, 0, {}], [1, 2, 1, 0, {}]], "conformer": ["\u0093NUMPY\u0001\u0000v\u0000{'descr': '&lt;f8', 'fortran_order': False, 'shape': (3, 3), } \n\u00809B.\u00dc\u00c8\u00f4\u00bf\u00f5\u00ff\u00ff\u00ff\u00ff\u00ff\u00cf\u00bf\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0001\u0000\u0000\u0000\u0000\u0000\u00e0?\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u00809B.\u00dc\u00c8\u00f4?\u0006\u0000\u0000\u0000\u0000\u0000\u00d0\u00bf\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000", {}], "molprops": {"ofe-name": ""}}</data>
<data key="d0">{":version:": 1, "__module__": "gufe.components.smallmoleculecomponent", "__qualname__": "SmallMoleculeComponent", "atoms": [[6, 0, 0, false, 0, 0, {}, 4], [6, 0, 0, false, 0, 0, {}, 4], [8, 0, 0, false, 0, 0, {}, 4]], "bonds": [[0, 1, 1, 0, {}], [1, 2, 1, 0, {}]], "conformer": ["\u0093NUMPY\u0001\u0000v\u0000{'descr': '&lt;f8', 'fortran_order': False, 'shape': (3, 3), } \n\u00809B.\u00dc\u00c8\u00f4\u00bf\u00f5\u00ff\u00ff\u00ff\u00ff\u00ff\u00cf\u00bf\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0001\u0000\u0000\u0000\u0000\u0000\u00e0?\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u00809B.\u00dc\u00c8\u00f4?\u0006\u0000\u0000\u0000\u0000\u0000\u00d0\u00bf\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000", {}], "molprops": {"ofe-name": ""}}</data>
</node>
<node id="mol2">
<data key="d0">{":version:": 1, "__module__": "gufe.components.smallmoleculecomponent", "__qualname__": "SmallMoleculeComponent", "atoms": [[6, 0, 0, false, 0, 0, {}], [8, 0, 0, false, 0, 0, {}]], "bonds": [[0, 1, 1, 0, {}]], "conformer": ["\u0093NUMPY\u0001\u0000v\u0000{'descr': '&lt;f8', 'fortran_order': False, 'shape': (2, 3), } \n\u0000\u0000\u0000\u0000\u0000\u0000\u00e8\u00bf\u0000\u0000\u0000\u0000\u0000\u0000\u0090&lt;\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u00e8?\u0000\u0000\u0000\u0000\u0000\u0000\u0090\u00bc\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000", {}], "molprops": {"ofe-name": ""}}</data>
<data key="d0">{":version:": 1, "__module__": "gufe.components.smallmoleculecomponent", "__qualname__": "SmallMoleculeComponent", "atoms": [[6, 0, 0, false, 0, 0, {}, 4], [8, 0, 0, false, 0, 0, {}, 4]], "bonds": [[0, 1, 1, 0, {}]], "conformer": ["\u0093NUMPY\u0001\u0000v\u0000{'descr': '&lt;f8', 'fortran_order': False, 'shape': (2, 3), } \n\u0000\u0000\u0000\u0000\u0000\u0000\u00e8\u00bf\u0000\u0000\u0000\u0000\u0000\u0000\u0090&lt;\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u00e8?\u0000\u0000\u0000\u0000\u0000\u0000\u0090\u00bc\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000", {}], "molprops": {"ofe-name": ""}}</data>
</node>
<edge source="mol0" target="mol2" id="0">
<data key="d1">[[0, 0]]</data>
Expand Down
30 changes: 30 additions & 0 deletions gufe/tests/test_smallmoleculecomponent.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,36 @@ def test_to_dict(self, phenol):

assert isinstance(d, dict)

def test_to_dict_hybridization(self, phenol):
"""
Make sure dict round trip saves the hybridization
<https://github.com/OpenFreeEnergy/gufe/issues/407>
"""
phenol_dict = phenol.to_dict()
TOKENIZABLE_REGISTRY.clear()
new_phenol = SmallMoleculeComponent.from_dict(phenol_dict)
for atom in new_phenol.to_rdkit().GetAtoms():
if atom.GetAtomicNum() == 6:
assert atom.GetHybridization() == Chem.rdchem.HybridizationType.SP2

def test_from_dict_missing_hybridization(self, phenol):
"""
For backwards compatibility make sure we can create an SMC with missing hybridization info.
"""
phenol_dict = phenol.to_dict()
new_atoms = []
for atom in phenol_dict["atoms"]:
# remove the hybridization atomic info which should be at index 7
new_atoms.append(tuple([atom_info for i, atom_info in enumerate(atom) if i != 7]))
phenol_dict["atoms"] = new_atoms
with pytest.warns(match="The atom hybridization data was not found and has been set to unspecified."):
new_phenol = SmallMoleculeComponent.from_dict(phenol_dict)
# they should be different objects due to the missing hybridization info
assert new_phenol != phenol
# make sure the rdkit objects are different
for atom_hybrid, atom_no_hybrid in zip(phenol.to_rdkit().GetAtoms(), new_phenol.to_rdkit().GetAtoms()):
assert atom_hybrid.GetHybridization() != atom_no_hybrid.GetHybridization()

@pytest.mark.skipif(not HAS_OFFTK, reason="no openff toolkit available")
def test_deserialize_roundtrip(self, toluene, phenol):
roundtrip = SmallMoleculeComponent.from_dict(phenol.to_dict())
Expand Down

0 comments on commit 5dd22ef

Please sign in to comment.