Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add logging at high-level landmarks or common debug points #86

Merged
merged 2 commits into from
Nov 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ repos:
rev: v0.7.4
hooks:
- id: ruff
args: ['--fix']
- id: ruff-format
- repo: https://github.com/asottile/add-trailing-comma
rev: v3.1.0
Expand Down
2 changes: 1 addition & 1 deletion yammbs/_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -825,7 +825,7 @@ def get_tfd(
),
)
except Exception as e:
logging.warning(f"Molecule {inchi_key} failed with {e!s}")
LOGGER.warning(f"Molecule {inchi_key} failed with {e!s}")

return tfds

Expand Down
17 changes: 15 additions & 2 deletions yammbs/torsion/_minimize.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ def _minimize_torsions(
n_processes: int = 2,
chunksize=32,
) -> Generator[ConstrainedMinimizationResult, None, None]:
LOGGER.info("Mapping `data` generator into `inputs` generator")

# It'd be smoother to skip this tranformation - just pass this generator
# from inside of TorsionStore
inputs: Generator[ConstrainedMinimizationInput, None, None] = (
Expand All @@ -80,6 +82,8 @@ def _minimize_torsions(
for (molecule_id, mapped_smiles, dihedral_indices, grid_id, coordinates, _) in data
)

LOGGER.info("Setting up multiprocessing pool with generator (of unknown length)")

# TODO: It'd be nice to have the `total` argument passed through, but that would require using
# a list-like iterable instead of a generator, which might cause problems at scale
with Pool(processes=n_processes) as pool:
Expand Down Expand Up @@ -114,11 +118,14 @@ def _minimize_constrained(
from openff.interchange.operations.minimize import _DEFAULT_ENERGY_MINIMIZATION_TOLERANCE
from openff.toolkit import Molecule, Quantity

LOGGER.debug(f"Setting up constrained minimization for {input.dict()=}")

# TODO: Pass this through
restrain_k = 1.0

# TODO: GAFF/Espaloma/local file/plugin force fields

LOGGER.debug(f"Loading force field {input.force_field=}")
force_field = _lazy_load_force_field(input.force_field)

# if this force field is constrained, this will be the H-* constraint ...
Expand All @@ -127,9 +134,11 @@ def _minimize_constrained(
except (KeyError, AssertionError):
pass

LOGGER.debug(f"Creating molecule, with conformer, from {input.mapped_smiles=}")
molecule = Molecule.from_mapped_smiles(input.mapped_smiles, allow_undefined_stereo=True)
molecule.add_conformer(Quantity(input.coordinates, "angstrom"))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I could see the coordinates themselves being interesting for a super-involved debugging session, but it would also produce a ton of output.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah I'm inclined to keep those out of STDOUT unless we really want to include them - currently DEBUG is, I think, large enough to parse through with some skill but not so large it'll fill up disks with log files


LOGGER.debug("Creating interchange object")
interchange = force_field.create_interchange(molecule.to_topology())

restraint_force = openmm.CustomExternalForce("0.5*k*((x-x0)^2+(y-y0)^2+(z-z0)^2)")
Expand All @@ -146,6 +155,7 @@ def _minimize_constrained(
# switch to nm now... just in case
positions = interchange.positions.to("nanometer")

LOGGER.debug(f"Adding restraint to particles not in {input.dihedral_indices=}")
for atom_index in range(molecule.n_atoms):
if atom_index in input.dihedral_indices:
continue
Expand All @@ -157,6 +167,7 @@ def _minimize_constrained(
[x.to_openmm() for x in positions[atom_index]],
)

LOGGER.debug("Creating openmm.app.Simulation object")
simulation = interchange.to_openmm_simulation(
openmm.LangevinMiddleIntegrator(
293.15 * openmm.unit.kelvin,
Expand All @@ -172,22 +183,24 @@ def _minimize_constrained(
for index in input.dihedral_indices:
simulation.system.setParticleMass(index, 0.0)

LOGGER.debug("Trying to minimize energy")
try:
simulation.minimizeEnergy(
tolerance=_DEFAULT_ENERGY_MINIMIZATION_TOLERANCE.to_openmm(),
maxIterations=10_000,
)
except Exception as e:
logging.error(
LOGGER.error(
{
index: simulation.system.getParticleMass(index)._value
for index in range(simulation.system.getNumParticles())
},
)
logging.error(input.dihedral_indices, input.mapped_smiles)
LOGGER.error(input.dihedral_indices, input.mapped_smiles)

raise ConstrainedMinimizationError("Minimization failed, see logger") from e

LOGGER.debug("Returning result")
return ConstrainedMinimizationResult(
molecule_id=input.molecule_id,
mapped_smiles=input.mapped_smiles,
Expand Down
26 changes: 26 additions & 0 deletions yammbs/torsion/_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ def __init__(self, database_path: Pathlike = "torsion-store.sqlite"):
f"Only paths to SQLite databases ending in .sqlite are supported. Given: {database_path}",
)

LOGGER.info("Creating a new TorsionStore at {database_path=}")

self.database_url = f"sqlite:///{database_path.resolve()}"
self.engine = create_engine(self.database_url)
DBBase.metadata.create_all(self.engine)
Expand Down Expand Up @@ -198,8 +200,15 @@ def from_torsion_dataset(
if pathlib.Path(database_name).exists():
raise DatabaseExistsError(f"Database {database_name} already exists.")

LOGGER.info(
f"Creating a new TorsionStore at {database_name=} from a QCArchiveTorsionDataset "
"(which is a YAMMBS model).",
)

store = cls(database_name)

LOGGER.info("Iterating through qm_torsions field of QCArchiveTorsionDataset (which is a YAMMBS model).")

for qm_torsion in dataset.qm_torsions:
torsion_record = TorsionRecord(
mapped_smiles=qm_torsion.mapped_smiles,
Expand Down Expand Up @@ -230,6 +239,11 @@ def from_qcsubmit_collection(
database_name: str,
):
"""Convert a QCSubmit collection to TorsionDataset and use it to create a TorsionStore."""
LOGGER.info(
f"Creating a new TorsionStore at {database_name=} from a TorsionDriveResultCollection "
"(which is a QCSubmit model).",
)

return cls.from_torsion_dataset(
dataset=QCArchiveTorsionDataset.from_qcsubmit_collection(collection),
database_name=database_name,
Expand All @@ -254,6 +268,8 @@ def optimize_mm(
for molecule_id in self.get_molecule_ids()
}

LOGGER.info(f"Setting up generator of data for minimization with {force_field=}")

with self._get_session() as db:
# TODO: Implement "seen" behavior to short-circuit already-optimized torsions
data: Generator[
Expand Down Expand Up @@ -284,12 +300,16 @@ def optimize_mm(
).all()
)

LOGGER.info(f"Passing generator of data to minimization with {force_field=}")

minimization_results = _minimize_torsions(
data=data,
force_field=force_field,
n_processes=n_processes,
)

LOGGER.info(f"Storing minimization results in database with {force_field=}")

with self._get_session() as db:
for result in minimization_results:
db.store_mm_torsion_point(
Expand All @@ -312,6 +332,8 @@ def get_log_sse(
molecule_ids = self.get_molecule_ids()

if not skip_check:
# TODO: Copy this into each get_* method?
LOGGER.info("Calling optimize_mm from inside of get_log_sse.")
self.optimize_mm(force_field=force_field)

log_sses = LogSSECollection()
Expand Down Expand Up @@ -339,6 +361,8 @@ def get_outputs(self) -> MinimizedTorsionDataset:

output_dataset = MinimizedTorsionDataset()

LOGGER.info("Getting outputs for all force fields.")

with self._get_session() as db:
for force_field in self.get_force_fields():
output_dataset.mm_torsions[force_field] = list()
Expand Down Expand Up @@ -376,6 +400,8 @@ def get_metrics(
) -> MetricCollection:
import pandas

LOGGER.info("Getting metrics for all force fields.")

metrics = MetricCollection()

# TODO: Optimize this for speed
Expand Down
9 changes: 9 additions & 0 deletions yammbs/torsion/inputs.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
from typing import Sequence

import qcelemental
Expand All @@ -11,6 +12,9 @@
bohr2angstroms = qcelemental.constants.bohr2angstroms


LOGGER = logging.getLogger(__name__)


class TorsionDataset(ImmutableModel):
tag: str

Expand Down Expand Up @@ -56,6 +60,11 @@ def from_qcsubmit_collection(
cls,
collection: TorsionDriveResultCollection,
) -> "QCArchiveTorsionDataset":
LOGGER.info(
"Converting a TorsionDriveResultCollection (a QCSubmit model) "
"to a QCArchiveTorsionDataset (a YAMMBS model)",
)

return cls(
qm_torsions=[
QCArchiveTorsionProfile(
Expand Down