Skip to content

Commit

Permalink
fix: ligandprep skip failed cases (#99)
Browse files Browse the repository at this point in the history
* doc:remove unidock_ad4 in README

* fix:ligandprep skip failed cases

* fix:write sdf set prop key name err
  • Loading branch information
dp-yuanyn authored Mar 9, 2024
1 parent f4fe0e4 commit 23c5ac4
Show file tree
Hide file tree
Showing 8 changed files with 335 additions and 1,370 deletions.
4 changes: 2 additions & 2 deletions unidock_tools/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,6 @@ pythonVersion = "3.11"
pythonPlatform = "Linux"
reportMissingImports = false
reportMissingModuleSource = false
reportMissingTypeStubs = false
reportAttributeAccessIssue = false
reportIncompatibleMethodOverride = false
reportIncompatibleMethodOverride = false
reportCallIssue = false
43 changes: 30 additions & 13 deletions unidock_tools/src/unidock_tools/application/ligandprep.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,37 +3,50 @@
import os
from functools import partial
from multiprocessing import Pool
import traceback
import logging
import argparse
from rdkit import Chem

from unidock_tools.utils import read_ligand
from unidock_tools.modules.ligand_prep import TopologyBuilder


def iter_ligands(ligands: List[Path], batch_size: int = 1200,
use_file_name: bool = False) -> Generator[List[Tuple[Chem.Mol, str]], None, None]:
curr_mol_name_list = []
for ligand in ligands:
mols = list(Chem.SDMolSupplier(str(ligand), removeHs=False))
mols = read_ligand(ligand)
for i, mol in enumerate(mols):
if not use_file_name and mol.HasProp("_Name") and mol.GetProp("_Name").strip():
name = mol.GetProp("_Name").strip()
if mol:
if not use_file_name and mol.HasProp("_Name") and mol.GetProp("_Name").strip():
name = mol.GetProp("_Name").strip()
else:
name = f"{ligand.stem}_{i}" if len(mols) > 1 else ligand.stem
curr_mol_name_list.append((mol, name))
if len(curr_mol_name_list) > batch_size:
yield curr_mol_name_list
curr_mol_name_list = []
else:
name = f"{ligand.stem}_{i}" if len(mols) > 1 else ligand.stem
curr_mol_name_list.append((mol, name))
if len(curr_mol_name_list) > batch_size:
yield curr_mol_name_list
curr_mol_name_list = []
logging.warning(f"read ligand file {ligand.stem} ind {i} error")
if len(curr_mol_name_list) > 0:
yield curr_mol_name_list
return


def ligprep(mol_name_tup: Tuple[Chem.Mol, str], savedir: Path):
def ligprep(mol_name_tup: Tuple[Chem.Mol, str], savedir: Path, save_format: str = "sdf"):
mol, name = mol_name_tup
tb = TopologyBuilder(mol)
tb.build_molecular_graph()
tb.write_sdf_file(os.path.join(savedir, f"{name}.sdf"))
try:
tb = TopologyBuilder(mol)
tb.build_molecular_graph()
if save_format == "sdf":
tb.write_sdf_file(os.path.join(savedir, f"{name}.sdf"))
elif save_format == "pdbqt":
tb.write_pdbqt_file(os.path.join(savedir, f"{name}.pdbqt"))
else:
logging.error(f"Invalid save format: {save_format}")
except:
logging.error(f"ligprep failed for {name}: {traceback.format_exc()}")


def main(args: dict):
Expand All @@ -52,9 +65,11 @@ def main(args: dict):
continue
ligands.append(Path(line.strip()).resolve())

os.makedirs(Path(args["savedir"]).resolve(), exist_ok=True)
for mol_name_tup_list in iter_ligands(ligands, args["batch_size"], args["use_file_name"]):
with Pool(os.cpu_count()) as pool:
pool.map(partial(ligprep, savedir=args["savedir"]), mol_name_tup_list)
pool.map(partial(ligprep, savedir=args["savedir"],
save_format=args["save_format"]), mol_name_tup_list)


def get_parser() -> argparse.ArgumentParser:
Expand All @@ -65,6 +80,8 @@ def get_parser() -> argparse.ArgumentParser:
help="A text file containing the path of ligand files in sdf format.")
parser.add_argument("-sd", "--savedir", type=str, default="ligprep_results",
help="Save directory. Default: 'MultiConfDock-Result'.")
parser.add_argument("-sf", "--save_format", type=str, default="sdf",
help="Ligprep result files format. Choose from ['sdf', 'pdbqt']. Default: 'sdf'.")
parser.add_argument("-bs", "--batch_size", type=int, default=1200,
help="Batch size for docking. Default: 1200.")
parser.add_argument("-ufn", "--use_file_name", action="store_true",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,7 @@ def main(args: dict):
batch_size=int(args["batch_size"]),
score_only=bool(args["score_only"]),
local_only=bool(args["local_only"]),
props_list=["fragInfo", "torsionInfo", "atomInfo"],
)
runner.save_results(save_dir=savedir)
end_time = time.time()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Dict, Tuple
from typing import Dict, Tuple, Union
import os
import numpy as np
import networkx as nx
Expand Down Expand Up @@ -163,7 +163,7 @@ def build_molecular_graph(self):
node_idx = 0
root_fragment = splitted_mol_list[root_fragment_idx]
num_root_atoms = root_fragment.GetNumAtoms()
atom_info_list = [None] * num_root_atoms
atom_info_list = [dict()] * num_root_atoms
for root_atom_idx in range(num_root_atoms):
root_atom = root_fragment.GetAtomWithIdx(root_atom_idx)
atom_info_dict = {}
Expand All @@ -189,7 +189,7 @@ def build_molecular_graph(self):
else:
fragment = splitted_mol_list[fragment_idx]
num_fragment_atoms = fragment.GetNumAtoms()
atom_info_list = [None] * num_fragment_atoms
atom_info_list = [dict()] * num_fragment_atoms
for atom_idx in range(num_fragment_atoms):
atom = fragment.GetAtomWithIdx(atom_idx)
atom_info_dict = {}
Expand Down Expand Up @@ -341,7 +341,7 @@ def _deep_first_search(self, node_idx):
self.pdbqt_atom_line_list.append(
self.pdbqt_end_branch_line_format.format('ENDBRANCH', parent_atom_idx, offspring_atom_idx))

def write_pdbqt_file(self, out_file: str = ''):
def write_pdbqt_file(self, out_file: Union[str, bytes, os.PathLike]):
self.pdbqt_remark_line_list = []
self.pdbqt_atom_line_list = []

Expand Down Expand Up @@ -383,7 +383,7 @@ def write_pdbqt_file(self, out_file: str = ''):
for pdbqt_line in self.pdbqt_line_list:
f.write(pdbqt_line)

def write_constraint_bpf_file(self, out_path: str = ''):
def write_constraint_bpf_file(self, out_path: Union[str, bytes, os.PathLike]):
self.core_bpf_remark_line_list = []
self.core_bpf_atom_line_list = []
self.core_bpf_atom_line_format = '{:8.3f}\t{:8.3f}\t{:8.3f}\t{:6.2f}\t{:6.2f}\t{:3s}\t{:<2s}\n'
Expand Down Expand Up @@ -452,21 +452,13 @@ def get_sdf_torsion_tree_info(self) -> Tuple[str, str, str, str]:

return frag_info_str, frag_all_info_str, torsion_info_str, atom_info_str

def write_sdf_file(self, out_file: str = '', do_rigid_docking: bool = False):
def write_sdf_file(self, out_file: Union[str, bytes, os.PathLike], do_rigid_docking: bool = False):
frag_info_str, frag_all_info_str, torsion_info_str, atom_info_str = self.get_sdf_torsion_tree_info()
if do_rigid_docking:
self.mol.SetProp("fragInfo", frag_all_info_str)
else:
self.mol.SetProp("fragInfo", frag_info_str)
self.mol.SetProp("torsionInfo", torsion_info_str)
self.mol.SetProp("atomInfo", atom_info_str)
if out_file:
os.makedirs(os.path.dirname(os.path.abspath(out_file)), exist_ok=True)
with Chem.SDWriter(out_file) as writer:
writer.write(self.mol)


def generate_topology(mol: Chem.Mol, out_file: str = ''):
topology_builder = TopologyBuilder(mol)
topology_builder.build_molecular_graph()
topology_builder.write_pdbqt_file(out_file=out_file)
with Chem.SDWriter(str(out_file)) as writer:
writer.write(self.mol)
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from math import isnan, isinf
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem.rdchem import BondType


def get_pdbqt_atom_lines(mol: Chem.Mol, donors: List[int], acceptors: List[int]):
Expand Down
9 changes: 4 additions & 5 deletions unidock_tools/src/unidock_tools/utils/mol_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def update_conf_props(self, conf_props: dict):
self.conf_props.update(conf_props)

def get_rdkit_mol_conf_with_props(self, conf_idx: int, props_list: List[str] = [],
exclude_props_list: List[str] = []):
exclude_props_list: List[str] = []) -> Chem.Mol:
mol = copy.copy(self.mol_confs[conf_idx])
props = copy.deepcopy(self.get_props())
props.update({k:v[conf_idx] for k, v in self.get_conf_props().items()})
Expand Down Expand Up @@ -135,10 +135,9 @@ def write_sdf_by_idx(self,
) -> List[Path]:
save_dir = make_tmp_dir(str(save_dir), False, False)

mol_confs_copy = [None] * len(self.mol_group[idx])
for conf_idx in range(len(self.mol_group[idx])):
mol_conf_copy = self.mol_group[idx].get_rdkit_mol_conf_with_props(conf_idx, props_list, exclude_props_list)
mol_confs_copy[conf_idx] = mol_conf_copy
mol_confs_copy = [self.mol_group[idx].get_rdkit_mol_conf_with_props(
conf_idx, props_list, exclude_props_list) for conf_idx in range(
len(self.mol_group[idx]))]
# save SDF files
file_prefix = self.mol_group[idx].get_props()['file_prefix']
sdf_file_list = []
Expand Down
4 changes: 2 additions & 2 deletions unidock_tools/src/unidock_tools/utils/rdkit_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ def set_properties(mol: Chem.Mol, properties: dict):
"""
for key, value in properties.items():
try:
if isinstance(key, int):
if isinstance(value, int):
mol.SetIntProp(key, value)
elif isinstance(key, float):
elif isinstance(value, float):
mol.SetDoubleProp(key, value)
else:
mol.SetProp(key, str(value))
Expand Down
Loading

0 comments on commit 23c5ac4

Please sign in to comment.