From c859613a120132394c0e7889b2c6a4d8e48ba39a Mon Sep 17 00:00:00 2001 From: "Matthew W. Thompson" Date: Mon, 25 Nov 2024 12:31:17 -0600 Subject: [PATCH 1/2] Add logging at high-level landmarks or common debug points --- .pre-commit-config.yaml | 1 + yammbs/_store.py | 2 +- yammbs/torsion/_minimize.py | 17 +++++++++++++++-- yammbs/torsion/_store.py | 26 ++++++++++++++++++++++++++ yammbs/torsion/inputs.py | 9 +++++++++ 5 files changed, 52 insertions(+), 3 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index cb68673..db638f2 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -15,6 +15,7 @@ repos: rev: v0.7.4 hooks: - id: ruff + args: ['--fix'] - id: ruff-format - repo: https://github.com/asottile/add-trailing-comma rev: v3.1.0 diff --git a/yammbs/_store.py b/yammbs/_store.py index 4d3a5f8..16e4f5d 100644 --- a/yammbs/_store.py +++ b/yammbs/_store.py @@ -825,7 +825,7 @@ def get_tfd( ), ) except Exception as e: - logging.warning(f"Molecule {inchi_key} failed with {e!s}") + LOGGER.warning(f"Molecule {inchi_key} failed with {e!s}") return tfds diff --git a/yammbs/torsion/_minimize.py b/yammbs/torsion/_minimize.py index b4b948f..1939feb 100644 --- a/yammbs/torsion/_minimize.py +++ b/yammbs/torsion/_minimize.py @@ -66,6 +66,8 @@ def _minimize_torsions( n_processes: int = 2, chunksize=32, ) -> Generator[ConstrainedMinimizationResult, None, None]: + LOGGER.info("Mapping `data` generator into `inputs` generator") + # It'd be smoother to skip this tranformation - just pass this generator # from inside of TorsionStore inputs: Generator[ConstrainedMinimizationInput, None, None] = ( @@ -80,6 +82,8 @@ def _minimize_torsions( for (molecule_id, mapped_smiles, dihedral_indices, grid_id, coordinates, _) in data ) + LOGGER.info("Setting up multiprocessing pool with generator (of unknown length)") + # TODO: It'd be nice to have the `total` argument passed through, but that would require using # a list-like iterable instead of a generator, which might cause problems at scale with Pool(processes=n_processes) as pool: @@ -114,11 +118,14 @@ def _minimize_constrained( from openff.interchange.operations.minimize import _DEFAULT_ENERGY_MINIMIZATION_TOLERANCE from openff.toolkit import Molecule, Quantity + LOGGER.debug(f"Setting up constrained minimization for {input.dict()=}") + # TODO: Pass this through restrain_k = 1.0 # TODO: GAFF/Espaloma/local file/plugin force fields + LOGGER.debug(f"Loading force field {input.force_field=}") force_field = _lazy_load_force_field(input.force_field) # if this force field is constrained, this will be the H-* constraint ... @@ -127,9 +134,11 @@ def _minimize_constrained( except (KeyError, AssertionError): pass + LOGGER.debug(f"Creating molecule, with conformer, from {input.mapped_smiles=}") molecule = Molecule.from_mapped_smiles(input.mapped_smiles, allow_undefined_stereo=True) molecule.add_conformer(Quantity(input.coordinates, "angstrom")) + LOGGER.debug("Creating interchange object") interchange = force_field.create_interchange(molecule.to_topology()) restraint_force = openmm.CustomExternalForce("0.5*k*((x-x0)^2+(y-y0)^2+(z-z0)^2)") @@ -146,6 +155,7 @@ def _minimize_constrained( # switch to nm now... just in case positions = interchange.positions.to("nanometer") + LOGGER.debug(f"Adding restraint for to particles not in {input.dihedral_indices=}") for atom_index in range(molecule.n_atoms): if atom_index in input.dihedral_indices: continue @@ -157,6 +167,7 @@ def _minimize_constrained( [x.to_openmm() for x in positions[atom_index]], ) + LOGGER.debug("Creating openmm.app.Simulation object") simulation = interchange.to_openmm_simulation( openmm.LangevinMiddleIntegrator( 293.15 * openmm.unit.kelvin, @@ -172,22 +183,24 @@ def _minimize_constrained( for index in input.dihedral_indices: simulation.system.setParticleMass(index, 0.0) + LOGGER.debug("Trying to minimize energy") try: simulation.minimizeEnergy( tolerance=_DEFAULT_ENERGY_MINIMIZATION_TOLERANCE.to_openmm(), maxIterations=10_000, ) except Exception as e: - logging.error( + LOGGER.error( { index: simulation.system.getParticleMass(index)._value for index in range(simulation.system.getNumParticles()) }, ) - logging.error(input.dihedral_indices, input.mapped_smiles) + LOGGER.error(input.dihedral_indices, input.mapped_smiles) raise ConstrainedMinimizationError("Minimization failed, see logger") from e + LOGGER.debug("Returning result") return ConstrainedMinimizationResult( molecule_id=input.molecule_id, mapped_smiles=input.mapped_smiles, diff --git a/yammbs/torsion/_store.py b/yammbs/torsion/_store.py index 278bf46..05ba32c 100644 --- a/yammbs/torsion/_store.py +++ b/yammbs/torsion/_store.py @@ -42,6 +42,8 @@ def __init__(self, database_path: Pathlike = "torsion-store.sqlite"): f"Only paths to SQLite databases ending in .sqlite are supported. Given: {database_path}", ) + LOGGER.info("Creating a new TorsionStore at {database_path=}") + self.database_url = f"sqlite:///{database_path.resolve()}" self.engine = create_engine(self.database_url) DBBase.metadata.create_all(self.engine) @@ -198,8 +200,15 @@ def from_torsion_dataset( if pathlib.Path(database_name).exists(): raise DatabaseExistsError(f"Database {database_name} already exists.") + LOGGER.info( + f"Creating a new TorsionStore at {database_name=} from a QCArchiveTorsionDataset " + "(which is a YAMMBS model).", + ) + store = cls(database_name) + LOGGER.info("Iterating through qm_torsions field of QCArchiveTorsionDataset (which is a YAMMBS model).") + for qm_torsion in dataset.qm_torsions: torsion_record = TorsionRecord( mapped_smiles=qm_torsion.mapped_smiles, @@ -230,6 +239,11 @@ def from_qcsubmit_collection( database_name: str, ): """Convert a QCSubmit collection to TorsionDataset and use it to create a TorsionStore.""" + LOGGER.info( + f"Creating a new TorsionStore at {database_name=} from a TorsionDriveResultCollection " + "(which is a QCSubmit model).", + ) + return cls.from_torsion_dataset( dataset=QCArchiveTorsionDataset.from_qcsubmit_collection(collection), database_name=database_name, @@ -254,6 +268,8 @@ def optimize_mm( for molecule_id in self.get_molecule_ids() } + LOGGER.info(f"Setting up generator of data for minimization with {force_field=}") + with self._get_session() as db: # TODO: Implement "seen" behavior to short-circuit already-optimized torsions data: Generator[ @@ -284,12 +300,16 @@ def optimize_mm( ).all() ) + LOGGER.info(f"Passing generator of data to minimization with {force_field=}") + minimization_results = _minimize_torsions( data=data, force_field=force_field, n_processes=n_processes, ) + LOGGER.info(f"Storing minimization results in database with {force_field=}") + with self._get_session() as db: for result in minimization_results: db.store_mm_torsion_point( @@ -312,6 +332,8 @@ def get_log_sse( molecule_ids = self.get_molecule_ids() if not skip_check: + # TODO: Copy this into each get_* method? + LOGGER.info("Calling optimize_mm from inside of get_log_sse.") self.optimize_mm(force_field=force_field) log_sses = LogSSECollection() @@ -339,6 +361,8 @@ def get_outputs(self) -> MinimizedTorsionDataset: output_dataset = MinimizedTorsionDataset() + LOGGER.info("Getting outputs for all force fields.") + with self._get_session() as db: for force_field in self.get_force_fields(): output_dataset.mm_torsions[force_field] = list() @@ -376,6 +400,8 @@ def get_metrics( ) -> MetricCollection: import pandas + LOGGER.info("Getting metrics for all force fields.") + metrics = MetricCollection() # TODO: Optimize this for speed diff --git a/yammbs/torsion/inputs.py b/yammbs/torsion/inputs.py index cad6550..7a9f02b 100644 --- a/yammbs/torsion/inputs.py +++ b/yammbs/torsion/inputs.py @@ -1,3 +1,4 @@ +import logging from typing import Sequence import qcelemental @@ -11,6 +12,9 @@ bohr2angstroms = qcelemental.constants.bohr2angstroms +LOGGER = logging.getLogger(__name__) + + class TorsionDataset(ImmutableModel): tag: str @@ -56,6 +60,11 @@ def from_qcsubmit_collection( cls, collection: TorsionDriveResultCollection, ) -> "QCArchiveTorsionDataset": + LOGGER.info( + "Converting a TorsionDriveResultCollection (a QCSubmit model) " + "to a QCArchiveTorsionDataset (a YAMMBS model)", + ) + return cls( qm_torsions=[ QCArchiveTorsionProfile( From 592fffc921db3f911dc6746a8defd7a641e7ce33 Mon Sep 17 00:00:00 2001 From: Matt Thompson Date: Mon, 25 Nov 2024 13:23:34 -0600 Subject: [PATCH 2/2] Update yammbs/torsion/_minimize.py Co-authored-by: Brent Westbrook <36778786+ntBre@users.noreply.github.com> --- yammbs/torsion/_minimize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yammbs/torsion/_minimize.py b/yammbs/torsion/_minimize.py index 1939feb..8909d24 100644 --- a/yammbs/torsion/_minimize.py +++ b/yammbs/torsion/_minimize.py @@ -155,7 +155,7 @@ def _minimize_constrained( # switch to nm now... just in case positions = interchange.positions.to("nanometer") - LOGGER.debug(f"Adding restraint for to particles not in {input.dihedral_indices=}") + LOGGER.debug(f"Adding restraint to particles not in {input.dihedral_indices=}") for atom_index in range(molecule.n_atoms): if atom_index in input.dihedral_indices: continue