From d3e6231807455dbc6aefaee40553c484697ae7df Mon Sep 17 00:00:00 2001
From: Alex Morehead <acmwhb@missouri.edu>
Date: Tue, 13 Aug 2024 10:27:35 -0500
Subject: [PATCH] Support DockGen in mmCIF to PDB and apo-to-holo alignment
 scripts

---
 posebench/data/components/convert_mmcif_to_pdb.py         | 8 +++++++-
 .../data/components/protein_apo_to_holo_alignment.py      | 4 +++-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/posebench/data/components/convert_mmcif_to_pdb.py b/posebench/data/components/convert_mmcif_to_pdb.py
index e2d44d8..46b6937 100644
--- a/posebench/data/components/convert_mmcif_to_pdb.py
+++ b/posebench/data/components/convert_mmcif_to_pdb.py
@@ -44,7 +44,13 @@ def main(cfg: DictConfig):
         if cfg.lowercase_id:
             # Support the DockGen dataset's hybrid lowercase-uppercase pdb id-CCD ID format
             new_id_parts = new_id.split("_")
-            new_id = "_".join([part.lower() for part in new_id_parts[:2]] + new_id_parts[2:])
+            new_id = (
+                "_".join([part.lower() for part in new_id_parts[:2]])
+                + "_"
+                + "-".join([part.upper() for part in new_id_parts[2:-1]])
+                + "_"
+                + new_id_parts[-1]
+            )
         else:
             new_id = new_id.upper()
         mmcif_filepath = os.path.join(
diff --git a/posebench/data/components/protein_apo_to_holo_alignment.py b/posebench/data/components/protein_apo_to_holo_alignment.py
index fdf8776..6274123 100644
--- a/posebench/data/components/protein_apo_to_holo_alignment.py
+++ b/posebench/data/components/protein_apo_to_holo_alignment.py
@@ -119,7 +119,9 @@ def read_mols(
             ligs.append(lig)
     else:
         for file in os.listdir(os.path.join(dataset_dir, name)):
-            if file.endswith("_ligand.sdf") and "rdkit" not in file:
+            if (
+                file.endswith("_ligand.sdf") or file.endswith("_ligand.pdb")
+            ) and "rdkit" not in file:
                 lig = read_molecule(
                     os.path.join(dataset_dir, name, file), remove_hs=remove_hs, sanitize=True
                 )