diff --git a/gufe/components/smallmoleculecomponent.py b/gufe/components/smallmoleculecomponent.py
index 17a43fd8..814c473b 100644
--- a/gufe/components/smallmoleculecomponent.py
+++ b/gufe/components/smallmoleculecomponent.py
@@ -2,6 +2,7 @@
# For details, see https://github.com/OpenFreeEnergy/gufe
import logging
+import warnings
# openff complains about oechem being missing, shhh
logger = logging.getLogger("openff.toolkit")
@@ -68,6 +69,20 @@
}
_BONDSTEREO_TO_INT = {v: k for k, v in _INT_TO_BONDSTEREO.items()}
+# following the numbering in rdkit
+_INT_TO_HYBRIDIZATION = {
+ 0: Chem.rdchem.HybridizationType.UNSPECIFIED,
+ 1: Chem.rdchem.HybridizationType.S,
+ 2: Chem.rdchem.HybridizationType.SP,
+ 3: Chem.rdchem.HybridizationType.SP2,
+ 4: Chem.rdchem.HybridizationType.SP3,
+ 5: Chem.rdchem.HybridizationType.SP2D,
+ 6: Chem.rdchem.HybridizationType.SP3D,
+ 7: Chem.rdchem.HybridizationType.SP3D2,
+ 8: Chem.rdchem.HybridizationType.OTHER,
+}
+_HYBRIDIZATION_TO_INT = {v: k for k, v in _INT_TO_HYBRIDIZATION.items()}
+
def _setprops(obj, d: dict) -> None:
# add props onto rdkit "obj" (atom/bond/mol/conformer)
@@ -223,6 +238,7 @@ def _to_dict(self) -> dict:
_ATOMCHIRAL_TO_INT[atom.GetChiralTag()],
atom.GetAtomMapNum(),
atom.GetPropsAsDict(includePrivate=False),
+ _HYBRIDIZATION_TO_INT[atom.GetHybridization()],
)
)
output["atoms"] = atoms
@@ -264,6 +280,16 @@ def _from_dict(cls, d: dict):
a.SetChiralTag(_INT_TO_ATOMCHIRAL[atom[4]])
a.SetAtomMapNum(atom[5])
_setprops(a, atom[6])
+ try:
+ a.SetHybridization(_INT_TO_HYBRIDIZATION[atom[7]])
+ except IndexError:
+ warnings.warn(
+ "The atom hybridization data was not found and has been set to unspecified. This can be"
+ " fixed by recreating the SmallMoleculeComponent from the rdkit molecule after running "
+ "sanitization."
+ )
+ pass
+
em.AddAtom(a)
for bond in d["bonds"]:
diff --git a/gufe/tests/data/ligand_network.graphml b/gufe/tests/data/ligand_network.graphml
index 31fe9e56..d331b2f6 100644
--- a/gufe/tests/data/ligand_network.graphml
+++ b/gufe/tests/data/ligand_network.graphml
@@ -4,13 +4,13 @@
- {":version:": 1, "__module__": "gufe.components.smallmoleculecomponent", "__qualname__": "SmallMoleculeComponent", "atoms": [[6, 0, 0, false, 0, 0, {}], [6, 0, 0, false, 0, 0, {}]], "bonds": [[0, 1, 1, 0, {}]], "conformer": ["\u0093NUMPY\u0001\u0000v\u0000{'descr': '<f8', 'fortran_order': False, 'shape': (2, 3), } \n\u0000\u0000\u0000\u0000\u0000\u0000\u00e8\u00bf\u0000\u0000\u0000\u0000\u0000\u0000\u0090<\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u00e8?\u0000\u0000\u0000\u0000\u0000\u0000\u0090\u00bc\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000", {}], "molprops": {"ofe-name": ""}}
+ {":version:": 1, "__module__": "gufe.components.smallmoleculecomponent", "__qualname__": "SmallMoleculeComponent", "atoms": [[6, 0, 0, false, 0, 0, {}, 4], [6, 0, 0, false, 0, 0, {}, 4]], "bonds": [[0, 1, 1, 0, {}]], "conformer": ["\u0093NUMPY\u0001\u0000v\u0000{'descr': '<f8', 'fortran_order': False, 'shape': (2, 3), } \n\u0000\u0000\u0000\u0000\u0000\u0000\u00e8\u00bf\u0000\u0000\u0000\u0000\u0000\u0000\u0090<\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u00e8?\u0000\u0000\u0000\u0000\u0000\u0000\u0090\u00bc\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000", {}], "molprops": {"ofe-name": ""}}
- {":version:": 1, "__module__": "gufe.components.smallmoleculecomponent", "__qualname__": "SmallMoleculeComponent", "atoms": [[6, 0, 0, false, 0, 0, {}], [6, 0, 0, false, 0, 0, {}], [8, 0, 0, false, 0, 0, {}]], "bonds": [[0, 1, 1, 0, {}], [1, 2, 1, 0, {}]], "conformer": ["\u0093NUMPY\u0001\u0000v\u0000{'descr': '<f8', 'fortran_order': False, 'shape': (3, 3), } \n\u00809B.\u00dc\u00c8\u00f4\u00bf\u00f5\u00ff\u00ff\u00ff\u00ff\u00ff\u00cf\u00bf\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0001\u0000\u0000\u0000\u0000\u0000\u00e0?\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u00809B.\u00dc\u00c8\u00f4?\u0006\u0000\u0000\u0000\u0000\u0000\u00d0\u00bf\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000", {}], "molprops": {"ofe-name": ""}}
+ {":version:": 1, "__module__": "gufe.components.smallmoleculecomponent", "__qualname__": "SmallMoleculeComponent", "atoms": [[6, 0, 0, false, 0, 0, {}, 4], [6, 0, 0, false, 0, 0, {}, 4], [8, 0, 0, false, 0, 0, {}, 4]], "bonds": [[0, 1, 1, 0, {}], [1, 2, 1, 0, {}]], "conformer": ["\u0093NUMPY\u0001\u0000v\u0000{'descr': '<f8', 'fortran_order': False, 'shape': (3, 3), } \n\u00809B.\u00dc\u00c8\u00f4\u00bf\u00f5\u00ff\u00ff\u00ff\u00ff\u00ff\u00cf\u00bf\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0001\u0000\u0000\u0000\u0000\u0000\u00e0?\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u00809B.\u00dc\u00c8\u00f4?\u0006\u0000\u0000\u0000\u0000\u0000\u00d0\u00bf\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000", {}], "molprops": {"ofe-name": ""}}
- {":version:": 1, "__module__": "gufe.components.smallmoleculecomponent", "__qualname__": "SmallMoleculeComponent", "atoms": [[6, 0, 0, false, 0, 0, {}], [8, 0, 0, false, 0, 0, {}]], "bonds": [[0, 1, 1, 0, {}]], "conformer": ["\u0093NUMPY\u0001\u0000v\u0000{'descr': '<f8', 'fortran_order': False, 'shape': (2, 3), } \n\u0000\u0000\u0000\u0000\u0000\u0000\u00e8\u00bf\u0000\u0000\u0000\u0000\u0000\u0000\u0090<\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u00e8?\u0000\u0000\u0000\u0000\u0000\u0000\u0090\u00bc\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000", {}], "molprops": {"ofe-name": ""}}
+ {":version:": 1, "__module__": "gufe.components.smallmoleculecomponent", "__qualname__": "SmallMoleculeComponent", "atoms": [[6, 0, 0, false, 0, 0, {}, 4], [8, 0, 0, false, 0, 0, {}, 4]], "bonds": [[0, 1, 1, 0, {}]], "conformer": ["\u0093NUMPY\u0001\u0000v\u0000{'descr': '<f8', 'fortran_order': False, 'shape': (2, 3), } \n\u0000\u0000\u0000\u0000\u0000\u0000\u00e8\u00bf\u0000\u0000\u0000\u0000\u0000\u0000\u0090<\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u00e8?\u0000\u0000\u0000\u0000\u0000\u0000\u0090\u00bc\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000", {}], "molprops": {"ofe-name": ""}}
[[0, 0]]
diff --git a/gufe/tests/test_smallmoleculecomponent.py b/gufe/tests/test_smallmoleculecomponent.py
index e8a1c6ec..e224227d 100644
--- a/gufe/tests/test_smallmoleculecomponent.py
+++ b/gufe/tests/test_smallmoleculecomponent.py
@@ -339,6 +339,36 @@ def test_to_dict(self, phenol):
assert isinstance(d, dict)
+ def test_to_dict_hybridization(self, phenol):
+ """
+ Make sure dict round trip saves the hybridization
+
+ """
+ phenol_dict = phenol.to_dict()
+ TOKENIZABLE_REGISTRY.clear()
+ new_phenol = SmallMoleculeComponent.from_dict(phenol_dict)
+ for atom in new_phenol.to_rdkit().GetAtoms():
+ if atom.GetAtomicNum() == 6:
+ assert atom.GetHybridization() == Chem.rdchem.HybridizationType.SP2
+
+ def test_from_dict_missing_hybridization(self, phenol):
+ """
+ For backwards compatibility make sure we can create an SMC with missing hybridization info.
+ """
+ phenol_dict = phenol.to_dict()
+ new_atoms = []
+ for atom in phenol_dict["atoms"]:
+ # remove the hybridization atomic info which should be at index 7
+ new_atoms.append(tuple([atom_info for i, atom_info in enumerate(atom) if i != 7]))
+ phenol_dict["atoms"] = new_atoms
+ with pytest.warns(match="The atom hybridization data was not found and has been set to unspecified."):
+ new_phenol = SmallMoleculeComponent.from_dict(phenol_dict)
+ # they should be different objects due to the missing hybridization info
+ assert new_phenol != phenol
+ # make sure the rdkit objects are different
+ for atom_hybrid, atom_no_hybrid in zip(phenol.to_rdkit().GetAtoms(), new_phenol.to_rdkit().GetAtoms()):
+ assert atom_hybrid.GetHybridization() != atom_no_hybrid.GetHybridization()
+
@pytest.mark.skipif(not HAS_OFFTK, reason="no openff toolkit available")
def test_deserialize_roundtrip(self, toluene, phenol):
roundtrip = SmallMoleculeComponent.from_dict(phenol.to_dict())