diff --git a/gufe/components/smallmoleculecomponent.py b/gufe/components/smallmoleculecomponent.py index 17a43fd8..814c473b 100644 --- a/gufe/components/smallmoleculecomponent.py +++ b/gufe/components/smallmoleculecomponent.py @@ -2,6 +2,7 @@ # For details, see https://github.com/OpenFreeEnergy/gufe import logging +import warnings # openff complains about oechem being missing, shhh logger = logging.getLogger("openff.toolkit") @@ -68,6 +69,20 @@ } _BONDSTEREO_TO_INT = {v: k for k, v in _INT_TO_BONDSTEREO.items()} +# following the numbering in rdkit +_INT_TO_HYBRIDIZATION = { + 0: Chem.rdchem.HybridizationType.UNSPECIFIED, + 1: Chem.rdchem.HybridizationType.S, + 2: Chem.rdchem.HybridizationType.SP, + 3: Chem.rdchem.HybridizationType.SP2, + 4: Chem.rdchem.HybridizationType.SP3, + 5: Chem.rdchem.HybridizationType.SP2D, + 6: Chem.rdchem.HybridizationType.SP3D, + 7: Chem.rdchem.HybridizationType.SP3D2, + 8: Chem.rdchem.HybridizationType.OTHER, +} +_HYBRIDIZATION_TO_INT = {v: k for k, v in _INT_TO_HYBRIDIZATION.items()} + def _setprops(obj, d: dict) -> None: # add props onto rdkit "obj" (atom/bond/mol/conformer) @@ -223,6 +238,7 @@ def _to_dict(self) -> dict: _ATOMCHIRAL_TO_INT[atom.GetChiralTag()], atom.GetAtomMapNum(), atom.GetPropsAsDict(includePrivate=False), + _HYBRIDIZATION_TO_INT[atom.GetHybridization()], ) ) output["atoms"] = atoms @@ -264,6 +280,16 @@ def _from_dict(cls, d: dict): a.SetChiralTag(_INT_TO_ATOMCHIRAL[atom[4]]) a.SetAtomMapNum(atom[5]) _setprops(a, atom[6]) + try: + a.SetHybridization(_INT_TO_HYBRIDIZATION[atom[7]]) + except IndexError: + warnings.warn( + "The atom hybridization data was not found and has been set to unspecified. This can be" + " fixed by recreating the SmallMoleculeComponent from the rdkit molecule after running " + "sanitization." + ) + pass + em.AddAtom(a) for bond in d["bonds"]: diff --git a/gufe/tests/data/ligand_network.graphml b/gufe/tests/data/ligand_network.graphml index 31fe9e56..d331b2f6 100644 --- a/gufe/tests/data/ligand_network.graphml +++ b/gufe/tests/data/ligand_network.graphml @@ -4,13 +4,13 @@ - {":version:": 1, "__module__": "gufe.components.smallmoleculecomponent", "__qualname__": "SmallMoleculeComponent", "atoms": [[6, 0, 0, false, 0, 0, {}], [6, 0, 0, false, 0, 0, {}]], "bonds": [[0, 1, 1, 0, {}]], "conformer": ["\u0093NUMPY\u0001\u0000v\u0000{'descr': '<f8', 'fortran_order': False, 'shape': (2, 3), } \n\u0000\u0000\u0000\u0000\u0000\u0000\u00e8\u00bf\u0000\u0000\u0000\u0000\u0000\u0000\u0090<\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u00e8?\u0000\u0000\u0000\u0000\u0000\u0000\u0090\u00bc\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000", {}], "molprops": {"ofe-name": ""}} + {":version:": 1, "__module__": "gufe.components.smallmoleculecomponent", "__qualname__": "SmallMoleculeComponent", "atoms": [[6, 0, 0, false, 0, 0, {}, 4], [6, 0, 0, false, 0, 0, {}, 4]], "bonds": [[0, 1, 1, 0, {}]], "conformer": ["\u0093NUMPY\u0001\u0000v\u0000{'descr': '<f8', 'fortran_order': False, 'shape': (2, 3), } \n\u0000\u0000\u0000\u0000\u0000\u0000\u00e8\u00bf\u0000\u0000\u0000\u0000\u0000\u0000\u0090<\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u00e8?\u0000\u0000\u0000\u0000\u0000\u0000\u0090\u00bc\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000", {}], "molprops": {"ofe-name": ""}} - {":version:": 1, "__module__": "gufe.components.smallmoleculecomponent", "__qualname__": "SmallMoleculeComponent", "atoms": [[6, 0, 0, false, 0, 0, {}], [6, 0, 0, false, 0, 0, {}], [8, 0, 0, false, 0, 0, {}]], "bonds": [[0, 1, 1, 0, {}], [1, 2, 1, 0, {}]], "conformer": ["\u0093NUMPY\u0001\u0000v\u0000{'descr': '<f8', 'fortran_order': False, 'shape': (3, 3), } \n\u00809B.\u00dc\u00c8\u00f4\u00bf\u00f5\u00ff\u00ff\u00ff\u00ff\u00ff\u00cf\u00bf\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0001\u0000\u0000\u0000\u0000\u0000\u00e0?\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u00809B.\u00dc\u00c8\u00f4?\u0006\u0000\u0000\u0000\u0000\u0000\u00d0\u00bf\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000", {}], "molprops": {"ofe-name": ""}} + {":version:": 1, "__module__": "gufe.components.smallmoleculecomponent", "__qualname__": "SmallMoleculeComponent", "atoms": [[6, 0, 0, false, 0, 0, {}, 4], [6, 0, 0, false, 0, 0, {}, 4], [8, 0, 0, false, 0, 0, {}, 4]], "bonds": [[0, 1, 1, 0, {}], [1, 2, 1, 0, {}]], "conformer": ["\u0093NUMPY\u0001\u0000v\u0000{'descr': '<f8', 'fortran_order': False, 'shape': (3, 3), } \n\u00809B.\u00dc\u00c8\u00f4\u00bf\u00f5\u00ff\u00ff\u00ff\u00ff\u00ff\u00cf\u00bf\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0001\u0000\u0000\u0000\u0000\u0000\u00e0?\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u00809B.\u00dc\u00c8\u00f4?\u0006\u0000\u0000\u0000\u0000\u0000\u00d0\u00bf\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000", {}], "molprops": {"ofe-name": ""}} - {":version:": 1, "__module__": "gufe.components.smallmoleculecomponent", "__qualname__": "SmallMoleculeComponent", "atoms": [[6, 0, 0, false, 0, 0, {}], [8, 0, 0, false, 0, 0, {}]], "bonds": [[0, 1, 1, 0, {}]], "conformer": ["\u0093NUMPY\u0001\u0000v\u0000{'descr': '<f8', 'fortran_order': False, 'shape': (2, 3), } \n\u0000\u0000\u0000\u0000\u0000\u0000\u00e8\u00bf\u0000\u0000\u0000\u0000\u0000\u0000\u0090<\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u00e8?\u0000\u0000\u0000\u0000\u0000\u0000\u0090\u00bc\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000", {}], "molprops": {"ofe-name": ""}} + {":version:": 1, "__module__": "gufe.components.smallmoleculecomponent", "__qualname__": "SmallMoleculeComponent", "atoms": [[6, 0, 0, false, 0, 0, {}, 4], [8, 0, 0, false, 0, 0, {}, 4]], "bonds": [[0, 1, 1, 0, {}]], "conformer": ["\u0093NUMPY\u0001\u0000v\u0000{'descr': '<f8', 'fortran_order': False, 'shape': (2, 3), } \n\u0000\u0000\u0000\u0000\u0000\u0000\u00e8\u00bf\u0000\u0000\u0000\u0000\u0000\u0000\u0090<\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u00e8?\u0000\u0000\u0000\u0000\u0000\u0000\u0090\u00bc\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000", {}], "molprops": {"ofe-name": ""}} [[0, 0]] diff --git a/gufe/tests/test_smallmoleculecomponent.py b/gufe/tests/test_smallmoleculecomponent.py index e8a1c6ec..e224227d 100644 --- a/gufe/tests/test_smallmoleculecomponent.py +++ b/gufe/tests/test_smallmoleculecomponent.py @@ -339,6 +339,36 @@ def test_to_dict(self, phenol): assert isinstance(d, dict) + def test_to_dict_hybridization(self, phenol): + """ + Make sure dict round trip saves the hybridization + + """ + phenol_dict = phenol.to_dict() + TOKENIZABLE_REGISTRY.clear() + new_phenol = SmallMoleculeComponent.from_dict(phenol_dict) + for atom in new_phenol.to_rdkit().GetAtoms(): + if atom.GetAtomicNum() == 6: + assert atom.GetHybridization() == Chem.rdchem.HybridizationType.SP2 + + def test_from_dict_missing_hybridization(self, phenol): + """ + For backwards compatibility make sure we can create an SMC with missing hybridization info. + """ + phenol_dict = phenol.to_dict() + new_atoms = [] + for atom in phenol_dict["atoms"]: + # remove the hybridization atomic info which should be at index 7 + new_atoms.append(tuple([atom_info for i, atom_info in enumerate(atom) if i != 7])) + phenol_dict["atoms"] = new_atoms + with pytest.warns(match="The atom hybridization data was not found and has been set to unspecified."): + new_phenol = SmallMoleculeComponent.from_dict(phenol_dict) + # they should be different objects due to the missing hybridization info + assert new_phenol != phenol + # make sure the rdkit objects are different + for atom_hybrid, atom_no_hybrid in zip(phenol.to_rdkit().GetAtoms(), new_phenol.to_rdkit().GetAtoms()): + assert atom_hybrid.GetHybridization() != atom_no_hybrid.GetHybridization() + @pytest.mark.skipif(not HAS_OFFTK, reason="no openff toolkit available") def test_deserialize_roundtrip(self, toluene, phenol): roundtrip = SmallMoleculeComponent.from_dict(phenol.to_dict())