Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Save atom hybridization #408

Merged
merged 11 commits into from
Dec 4, 2024
26 changes: 26 additions & 0 deletions gufe/components/smallmoleculecomponent.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# For details, see https://github.com/OpenFreeEnergy/gufe

import logging
import warnings

# openff complains about oechem being missing, shhh
logger = logging.getLogger("openff.toolkit")
Expand Down Expand Up @@ -68,6 +69,20 @@
}
_BONDSTEREO_TO_INT = {v: k for k, v in _INT_TO_BONDSTEREO.items()}

# following the numbering in rdkit
_INT_TO_HYBRIDIZATION = {
0: Chem.rdchem.HybridizationType.UNSPECIFIED,
1: Chem.rdchem.HybridizationType.S,
2: Chem.rdchem.HybridizationType.SP,
3: Chem.rdchem.HybridizationType.SP2,
4: Chem.rdchem.HybridizationType.SP3,
5: Chem.rdchem.HybridizationType.SP2D,
6: Chem.rdchem.HybridizationType.SP3D,
7: Chem.rdchem.HybridizationType.SP3D2,
8: Chem.rdchem.HybridizationType.OTHER,
}
_HYBRIDIZATION_TO_INT = {v: k for k, v in _INT_TO_HYBRIDIZATION.items()}


def _setprops(obj, d: dict) -> None:
# add props onto rdkit "obj" (atom/bond/mol/conformer)
Expand Down Expand Up @@ -223,6 +238,7 @@ def _to_dict(self) -> dict:
_ATOMCHIRAL_TO_INT[atom.GetChiralTag()],
atom.GetAtomMapNum(),
atom.GetPropsAsDict(includePrivate=False),
_HYBRIDIZATION_TO_INT[atom.GetHybridization()],
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What happens in this case if this isn't set?

I.e. if you read and old file (which now skips the hybridization reading), then write it out again - what does this pick up?

)
)
output["atoms"] = atoms
Expand Down Expand Up @@ -264,6 +280,16 @@ def _from_dict(cls, d: dict):
a.SetChiralTag(_INT_TO_ATOMCHIRAL[atom[4]])
a.SetAtomMapNum(atom[5])
_setprops(a, atom[6])
try:
a.SetHybridization(_INT_TO_HYBRIDIZATION[atom[7]])
except IndexError:
warnings.warn(
"The atom hybridization data was not found and has been set to unspecified. This can be"
" fixed by recreating the SmallMoleculeComponent from the rdkit molecule after running "
"sanitization."
)
pass

em.AddAtom(a)

for bond in d["bonds"]:
Expand Down
6 changes: 3 additions & 3 deletions gufe/tests/data/ligand_network.graphml
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@
<key id="d0" for="node" attr.name="moldict" attr.type="string" />
<graph edgedefault="directed">
<node id="mol0">
<data key="d0">{":version:": 1, "__module__": "gufe.components.smallmoleculecomponent", "__qualname__": "SmallMoleculeComponent", "atoms": [[6, 0, 0, false, 0, 0, {}], [6, 0, 0, false, 0, 0, {}]], "bonds": [[0, 1, 1, 0, {}]], "conformer": ["\u0093NUMPY\u0001\u0000v\u0000{'descr': '&lt;f8', 'fortran_order': False, 'shape': (2, 3), } \n\u0000\u0000\u0000\u0000\u0000\u0000\u00e8\u00bf\u0000\u0000\u0000\u0000\u0000\u0000\u0090&lt;\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u00e8?\u0000\u0000\u0000\u0000\u0000\u0000\u0090\u00bc\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000", {}], "molprops": {"ofe-name": ""}}</data>
<data key="d0">{":version:": 1, "__module__": "gufe.components.smallmoleculecomponent", "__qualname__": "SmallMoleculeComponent", "atoms": [[6, 0, 0, false, 0, 0, {}, 4], [6, 0, 0, false, 0, 0, {}, 4]], "bonds": [[0, 1, 1, 0, {}]], "conformer": ["\u0093NUMPY\u0001\u0000v\u0000{'descr': '&lt;f8', 'fortran_order': False, 'shape': (2, 3), } \n\u0000\u0000\u0000\u0000\u0000\u0000\u00e8\u00bf\u0000\u0000\u0000\u0000\u0000\u0000\u0090&lt;\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u00e8?\u0000\u0000\u0000\u0000\u0000\u0000\u0090\u00bc\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000", {}], "molprops": {"ofe-name": ""}}</data>
</node>
<node id="mol1">
<data key="d0">{":version:": 1, "__module__": "gufe.components.smallmoleculecomponent", "__qualname__": "SmallMoleculeComponent", "atoms": [[6, 0, 0, false, 0, 0, {}], [6, 0, 0, false, 0, 0, {}], [8, 0, 0, false, 0, 0, {}]], "bonds": [[0, 1, 1, 0, {}], [1, 2, 1, 0, {}]], "conformer": ["\u0093NUMPY\u0001\u0000v\u0000{'descr': '&lt;f8', 'fortran_order': False, 'shape': (3, 3), } \n\u00809B.\u00dc\u00c8\u00f4\u00bf\u00f5\u00ff\u00ff\u00ff\u00ff\u00ff\u00cf\u00bf\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0001\u0000\u0000\u0000\u0000\u0000\u00e0?\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u00809B.\u00dc\u00c8\u00f4?\u0006\u0000\u0000\u0000\u0000\u0000\u00d0\u00bf\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000", {}], "molprops": {"ofe-name": ""}}</data>
<data key="d0">{":version:": 1, "__module__": "gufe.components.smallmoleculecomponent", "__qualname__": "SmallMoleculeComponent", "atoms": [[6, 0, 0, false, 0, 0, {}, 4], [6, 0, 0, false, 0, 0, {}, 4], [8, 0, 0, false, 0, 0, {}, 4]], "bonds": [[0, 1, 1, 0, {}], [1, 2, 1, 0, {}]], "conformer": ["\u0093NUMPY\u0001\u0000v\u0000{'descr': '&lt;f8', 'fortran_order': False, 'shape': (3, 3), } \n\u00809B.\u00dc\u00c8\u00f4\u00bf\u00f5\u00ff\u00ff\u00ff\u00ff\u00ff\u00cf\u00bf\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0001\u0000\u0000\u0000\u0000\u0000\u00e0?\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u00809B.\u00dc\u00c8\u00f4?\u0006\u0000\u0000\u0000\u0000\u0000\u00d0\u00bf\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000", {}], "molprops": {"ofe-name": ""}}</data>
</node>
<node id="mol2">
<data key="d0">{":version:": 1, "__module__": "gufe.components.smallmoleculecomponent", "__qualname__": "SmallMoleculeComponent", "atoms": [[6, 0, 0, false, 0, 0, {}], [8, 0, 0, false, 0, 0, {}]], "bonds": [[0, 1, 1, 0, {}]], "conformer": ["\u0093NUMPY\u0001\u0000v\u0000{'descr': '&lt;f8', 'fortran_order': False, 'shape': (2, 3), } \n\u0000\u0000\u0000\u0000\u0000\u0000\u00e8\u00bf\u0000\u0000\u0000\u0000\u0000\u0000\u0090&lt;\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u00e8?\u0000\u0000\u0000\u0000\u0000\u0000\u0090\u00bc\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000", {}], "molprops": {"ofe-name": ""}}</data>
<data key="d0">{":version:": 1, "__module__": "gufe.components.smallmoleculecomponent", "__qualname__": "SmallMoleculeComponent", "atoms": [[6, 0, 0, false, 0, 0, {}, 4], [8, 0, 0, false, 0, 0, {}, 4]], "bonds": [[0, 1, 1, 0, {}]], "conformer": ["\u0093NUMPY\u0001\u0000v\u0000{'descr': '&lt;f8', 'fortran_order': False, 'shape': (2, 3), } \n\u0000\u0000\u0000\u0000\u0000\u0000\u00e8\u00bf\u0000\u0000\u0000\u0000\u0000\u0000\u0090&lt;\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u00e8?\u0000\u0000\u0000\u0000\u0000\u0000\u0090\u00bc\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000", {}], "molprops": {"ofe-name": ""}}</data>
</node>
<edge source="mol0" target="mol2" id="0">
<data key="d1">[[0, 0]]</data>
Expand Down
30 changes: 30 additions & 0 deletions gufe/tests/test_smallmoleculecomponent.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,36 @@ def test_to_dict(self, phenol):

assert isinstance(d, dict)

def test_to_dict_hybridization(self, phenol):
"""
Make sure dict round trip saves the hybridization
<https://github.com/OpenFreeEnergy/gufe/issues/407>
"""
phenol_dict = phenol.to_dict()
TOKENIZABLE_REGISTRY.clear()
new_phenol = SmallMoleculeComponent.from_dict(phenol_dict)
for atom in new_phenol.to_rdkit().GetAtoms():
if atom.GetAtomicNum() == 6:
assert atom.GetHybridization() == Chem.rdchem.HybridizationType.SP2

def test_from_dict_missing_hybridization(self, phenol):
"""
For backwards compatibility make sure we can create an SMC with missing hybridization info.
"""
phenol_dict = phenol.to_dict()
new_atoms = []
for atom in phenol_dict["atoms"]:
# remove the hybridization atomic info which should be at index 7
new_atoms.append(tuple([atom_info for i, atom_info in enumerate(atom) if i != 7]))
phenol_dict["atoms"] = new_atoms
with pytest.warns(match="The atom hybridization data was not found and has been set to unspecified."):
new_phenol = SmallMoleculeComponent.from_dict(phenol_dict)
# they should be different objects due to the missing hybridization info
assert new_phenol != phenol
# make sure the rdkit objects are different
for atom_hybrid, atom_no_hybrid in zip(phenol.to_rdkit().GetAtoms(), new_phenol.to_rdkit().GetAtoms()):
assert atom_hybrid.GetHybridization() != atom_no_hybrid.GetHybridization()

@pytest.mark.skipif(not HAS_OFFTK, reason="no openff toolkit available")
def test_deserialize_roundtrip(self, toluene, phenol):
roundtrip = SmallMoleculeComponent.from_dict(phenol.to_dict())
Expand Down
Loading