Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add internal coordinate RMSD method #15

Merged
merged 2 commits into from
Jan 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions LICENSE-3RD-PARTY
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,42 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

================================= ForceBalance ================================

BSD 3-clause (aka BSD 2.0) License

Copyright 2011-2015 Stanford University and the Authors
Copyright 2015-2018 Regents of the University of California and the Authors

Developed by: Lee-Ping Wang
University of California, Davis
http://www.ucdavis.edu

Contributors: Yudong Qiu, Keri A. McKiernan, Jeffrey R. Wagner, Hyesu Jang,
Simon Boothroyd, Arthur Vigil, Erik G. Brandt, Johnny Israeli, John Stoppelman

Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:

1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.

3. Neither the name of the copyright holder nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
1 change: 1 addition & 0 deletions devtools/conda-envs/dev.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ dependencies:
- openmmforcefields
- smirnoff-plugins =2023.08.0
# espaloma =0.3
- geometric =1

- ipython
- ipdb
Expand Down
5 changes: 5 additions & 0 deletions ibstore/_base/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,9 @@ def validate_type(cls, val):
dtype = getattr(cls, "__dtype__", Any)
if dtype is Any:
dtype = None

# If Quantity, manually strip units to avoid downcast warning
if hasattr(val, "magnitude"):
val = val.magnitude

return numpy.asanyarray(val, dtype=dtype)
3 changes: 1 addition & 2 deletions ibstore/_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@
from typing import Dict, List

from sqlalchemy import Column, Float, ForeignKey, Integer, PickleType, String
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import relationship
from sqlalchemy.orm import declarative_base, relationship

from ibstore.models import MMConformerRecord, QMConformerRecord

Expand Down
51 changes: 51 additions & 0 deletions ibstore/_forcebalance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
"""Code vendored from ForceBalance."""
import numpy


def periodic_diff(a, b, v_periodic):
"""convenient function for computing the minimum difference in periodic coordinates
Parameters
----------
a: np.ndarray or float
Reference values in a numpy array
b: np.ndarray or float
Target values in a numpy arrary
v_periodic: float > 0
Value of the periodic boundary

Returns
-------
diff: np.ndarray
The array of same shape containing the difference between a and b
All return values are in range [-v_periodic/2, v_periodic/2),
"( )" means exclusive, "[ ]" means inclusive

Examples
-------
periodic_diff(0.0, 2.1, 2.0) => -0.1
periodic_diff(0.0, 1.9, 2.0) => 0.1
periodic_diff(0.0, 1.0, 2.0) => -1.0
periodic_diff(1.0, 0.0, 2.0) => -1.0
periodic_diff(1.0, 0.1, 2.0) => -0.9
periodic_diff(1.0, 10.1, 2.0) => 0.9
periodic_diff(1.0, 9.9, 2.0) => -0.9
"""
assert v_periodic > 0
h = 0.5 * v_periodic
return (a - b + h) % v_periodic - h


def compute_rmsd(ref, tar, v_periodic=None):
"""
Compute the RMSD between two arrays, supporting periodic difference
"""
assert len(ref) == len(tar), "array length must match"
n = len(ref)
if n == 0:
return 0.0
if v_periodic is not None:
diff = periodic_diff(ref, tar, v_periodic)
else:
diff = ref - tar
rmsd = numpy.sqrt(numpy.sum(diff**2) / n)
return rmsd
29 changes: 29 additions & 0 deletions ibstore/_molecule.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
"""Molecule conversion utilities"""
from typing import TYPE_CHECKING

from openff.toolkit import Molecule

from ibstore._base.array import Array

if TYPE_CHECKING:
from geometric.molecule import Molecule as GeometricMolecule


def _to_geometric_molecule(
molecule: Molecule,
coordinates: Array,
) -> "GeometricMolecule":
from geometric.molecule import Molecule as GeometricMolecule

geometric_molecule = GeometricMolecule()

geometric_molecule.Data = {
"resname": ["UNK"] * molecule.n_atoms,
"resid": [0] * molecule.n_atoms,
"elem": [atom.symbol for atom in molecule.atoms],
"bonds": [(bond.atom1_index, bond.atom2_index) for bond in molecule.bonds],
"name": molecule.name,
"xyzs": [coordinates],
}

return geometric_molecule
38 changes: 38 additions & 0 deletions ibstore/_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,14 @@
from ibstore._types import Pathlike
from ibstore.analysis import (
DDE,
ICRMSD,
RMSD,
TFD,
DDECollection,
ICRMSDCollection,
RMSDCollection,
TFDCollection,
get_internal_coordinate_rmsds,
get_rmsd,
get_tfd,
)
Expand Down Expand Up @@ -542,6 +545,41 @@ def get_rmsd(

return rmsds

def get_internal_coordinate_rmsd(
self,
force_field: str,
) -> ICRMSDCollection:
self.optimize_mm(force_field=force_field)

icrmsds = ICRMSDCollection()

for inchi_key in self.get_inchi_keys():
molecule = Molecule.from_inchi(inchi_key, allow_undefined_stereo=True)
molecule_id = self.get_molecule_id_by_inchi_key(inchi_key)

qcarchive_ids = self.get_qcarchive_ids_by_molecule_id(molecule_id)

qm_conformers = self.get_qm_conformers_by_molecule_id(molecule_id)
mm_conformers = self.get_mm_conformers_by_molecule_id(
molecule_id,
force_field,
)

for qm, mm, id in zip(
qm_conformers,
mm_conformers,
qcarchive_ids,
):
icrmsds.append(
ICRMSD(
qcarchive_id=id,
icrmsd=get_internal_coordinate_rmsds(molecule, qm, mm),
force_field=force_field,
),
)

return icrmsds

def get_tfd(
self,
force_field: str,
Expand Down
26 changes: 26 additions & 0 deletions ibstore/_tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,36 @@
import pytest
from openff.interchange._tests import MoleculeWithConformer
from openff.qcsubmit.results import OptimizationResultCollection
from openff.toolkit import Molecule
from openff.utilities.utilities import get_data_file_path

from ibstore._store import MoleculeStore


@pytest.fixture()
def water():
return MoleculeWithConformer.from_smiles("O")


@pytest.fixture()
def hydrogen_peroxide():
return MoleculeWithConformer.from_smiles("OO")


@pytest.fixture()
def formaldehyde():
return MoleculeWithConformer.from_smiles("C=O")


@pytest.fixture()
def ligand():
"""Return a ligand that can have many viable conformers."""
molecule = Molecule.from_smiles("C[C@@H](C(c1ccccc1)c2ccccc2)Nc3c4cc(c(cc4ncn3)F)F")
molecule.generate_conformers(n_conformers=100)

return molecule


@pytest.fixture()
def small_collection() -> OptimizationResultCollection:
return OptimizationResultCollection.parse_file(
Expand Down
58 changes: 58 additions & 0 deletions ibstore/_tests/unit_tests/test_analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
"""
def get_internal_coordinate_rmsds(
molecule: Molecule,
reference: Array,
target: Array,
_types: tuple[str] = ("Bond", "Angle", "Dihedral", "Improper"),
) -> dict[str, float]:
"""
from ibstore.analysis import get_internal_coordinate_rmsds


class TestInternalCoordinateRMSD:
def test_rmsds_between_conformers(self, ligand):
assert ligand.n_conformers

rmsds = get_internal_coordinate_rmsds(
molecule=ligand,
reference=ligand.conformers[0],
target=ligand.conformers[-1],
)

assert all(
[rmsds[key] > 0.0 for key in ["Bond", "Angle", "Dihedral", "Improper"]],
)

def test_matching_conformers_zero_rmsd(self, ligand):
rmsds = get_internal_coordinate_rmsds(
molecule=ligand,
reference=ligand.conformers[0],
target=ligand.conformers[0],
)

assert all(
[rmsds[key] == 0.0 for key in ["Bond", "Angle", "Dihedral", "Improper"]],
)

def test_no_torsions(self, water):
rmsds = get_internal_coordinate_rmsds(
molecule=water,
reference=water.conformers[0],
target=water.conformers[0],
)

assert rmsds["Bond"] == 0.0
assert rmsds["Angle"] == 0.0

assert "Dihedral" not in rmsds
assert "Improper" not in rmsds

def test_no_impropers(self, hydrogen_peroxide):
rmsds = get_internal_coordinate_rmsds(
molecule=hydrogen_peroxide,
reference=hydrogen_peroxide.conformers[0],
target=hydrogen_peroxide.conformers[0],
)

assert "Dihedral" in rmsds
assert "Improper" not in rmsds
20 changes: 20 additions & 0 deletions ibstore/_tests/unit_tests/test_molecule.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import numpy
from openff.toolkit import Molecule
from openff.units import unit

from ibstore._molecule import _to_geometric_molecule


def test_to_geometric_molecule():
molecule = Molecule.from_smiles("C1([H])CCCCN1")
molecule.generate_conformers(n_conformers=1)

geometric_molecule = _to_geometric_molecule(molecule, molecule.conformers[0].m)

assert molecule.n_atoms == len(geometric_molecule.Data["elem"])
assert molecule.n_bonds == len(geometric_molecule.Data["bonds"])

assert numpy.allclose(
molecule.conformers[0].m_as(unit.angstrom),
geometric_molecule.Data["xyzs"][0],
)
Loading
Loading