From d3e6231807455dbc6aefaee40553c484697ae7df Mon Sep 17 00:00:00 2001 From: Alex Morehead Date: Tue, 13 Aug 2024 10:27:35 -0500 Subject: [PATCH] Support DockGen in mmCIF to PDB and apo-to-holo alignment scripts --- posebench/data/components/convert_mmcif_to_pdb.py | 8 +++++++- .../data/components/protein_apo_to_holo_alignment.py | 4 +++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/posebench/data/components/convert_mmcif_to_pdb.py b/posebench/data/components/convert_mmcif_to_pdb.py index e2d44d8..46b6937 100644 --- a/posebench/data/components/convert_mmcif_to_pdb.py +++ b/posebench/data/components/convert_mmcif_to_pdb.py @@ -44,7 +44,13 @@ def main(cfg: DictConfig): if cfg.lowercase_id: # Support the DockGen dataset's hybrid lowercase-uppercase pdb id-CCD ID format new_id_parts = new_id.split("_") - new_id = "_".join([part.lower() for part in new_id_parts[:2]] + new_id_parts[2:]) + new_id = ( + "_".join([part.lower() for part in new_id_parts[:2]]) + + "_" + + "-".join([part.upper() for part in new_id_parts[2:-1]]) + + "_" + + new_id_parts[-1] + ) else: new_id = new_id.upper() mmcif_filepath = os.path.join( diff --git a/posebench/data/components/protein_apo_to_holo_alignment.py b/posebench/data/components/protein_apo_to_holo_alignment.py index fdf8776..6274123 100644 --- a/posebench/data/components/protein_apo_to_holo_alignment.py +++ b/posebench/data/components/protein_apo_to_holo_alignment.py @@ -119,7 +119,9 @@ def read_mols( ligs.append(lig) else: for file in os.listdir(os.path.join(dataset_dir, name)): - if file.endswith("_ligand.sdf") and "rdkit" not in file: + if ( + file.endswith("_ligand.sdf") or file.endswith("_ligand.pdb") + ) and "rdkit" not in file: lig = read_molecule( os.path.join(dataset_dir, name, file), remove_hs=remove_hs, sanitize=True )