From e259429889c55fa84eb9104be0ac15ea3d5d2093 Mon Sep 17 00:00:00 2001
From: Jacob Silterra <silterra@mit.edu>
Date: Tue, 9 Apr 2024 11:03:22 -0400
Subject: [PATCH] Add example CSV file

Meant to rename it earlier, accidentally just removed it.
---
 .gitignore                      | 4 ++--
 README.md                       | 4 ++--
 data/protein_ligand_example.csv | 3 +++
 3 files changed, 7 insertions(+), 4 deletions(-)
 create mode 100644 data/protein_ligand_example.csv

diff --git a/.gitignore b/.gitignore
index 56021b951..94568a6fe 100644
--- a/.gitignore
+++ b/.gitignore
@@ -125,10 +125,10 @@ local_config_inference2.yml
 .p.npy
 .score.npy
 # this ignores everything in data except for the file
-!/data
 /data/*
+!/data
 !/data/splits
-!/data/protein_ligand_example_csv.csv
+!/data/protein_ligand_example*
 !/data/testset_csv.csv
 !/data/INDEX_general_PL_data.2020
 test_run
diff --git a/README.md b/README.md
index f605b874a..a3b6c9338 100644
--- a/README.md
+++ b/README.md
@@ -87,11 +87,11 @@ The protein inputs need to be `.pdb` files or sequences that will be folded with
 For a single complex: specify the protein with `--protein_path protein.pdb` or `--protein_sequence GIQSYCTPPYSVLQDPPQPVV` and the ligand with `--ligand ligand.sdf` or `--ligand "COc(cc1)ccc1C#N"`
 
 For many complexes: create a csv file with paths to proteins and ligand files or SMILES. It contains as columns `complex_name` (name used to save predictions, can be left empty), `protein_path` (path to `.pdb` file, if empty uses sequence), `ligand_description` (SMILE or file path)  and `protein_sequence` (to fold with ESMFold in case the protein_path is empty).
-An example .csv is at `data/protein_ligand_example_csv.csv` and you would use it with `--protein_ligand_csv protein_ligand_example_csv.csv`.
+An example .csv is at `data/protein_ligand_example.csv` and you would use it with `--protein_ligand_csv protein_ligand_example.csv`.
 
 And you are ready to run inference:
 
-    python -m inference --config default_inference_args.yaml  --protein_ligand_csv data/protein_ligand_example_csv.csv --out_dir results/user_predictions_small 
+    python -m inference --config default_inference_args.yaml  --protein_ligand_csv data/protein_ligand_example.csv --out_dir results/user_predictions_small 
 
 When providing the `.pdb` files you can run DiffDock also on CPU, however, if possible, we recommend using a GPU as the model runs significantly faster. Note that the first time you run DiffDock on a device the program will precompute and store in cache look-up tables for SO(2) and SO(3) distributions (typically takes a couple of minutes), this won't be repeated in following runs.  
 
diff --git a/data/protein_ligand_example.csv b/data/protein_ligand_example.csv
new file mode 100644
index 000000000..301de7009
--- /dev/null
+++ b/data/protein_ligand_example.csv
@@ -0,0 +1,3 @@
+complex_name,protein_path,ligand_description,protein_sequence
+1a0q,data/1a0q/1a0q_protein_processed.pdb,data/1a0q/1a0q_ligand.sdf,
+1a0q_custom,data/1a0q/1a0q_protein_processed.pdb,COc(cc1)ccc1C#N,
\ No newline at end of file