From 1c29f36007121314de01238a95fdab9086dafa32 Mon Sep 17 00:00:00 2001 From: kierandidi Date: Fri, 20 Dec 2024 20:47:33 +0100 Subject: [PATCH] change database setup script to include gpu options --- setup_databases.sh | 75 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 52 insertions(+), 23 deletions(-) diff --git a/setup_databases.sh b/setup_databases.sh index 64ae1449..ecd87767 100755 --- a/setup_databases.sh +++ b/setup_databases.sh @@ -13,6 +13,9 @@ PDB_AWS_SNAPSHOT="20240101" UNIREF30DB="uniref30_2302" MMSEQS_NO_INDEX=${MMSEQS_NO_INDEX:-} +DOWNLOADS_ONLY=${DOWNLOADS_ONLY:-} +GPU=${GPU:-} +mkdir -p -- "${WORKDIR}" cd "${WORKDIR}" hasCommand () { @@ -56,15 +59,51 @@ downloadFile() { fail "Could not download $URL to $OUTPUT" } +if [ ! DOWNLOADS_READY ]; then + downloadFile "https://wwwuser.gwdg.de/~compbiol/colabfold/${UNIREF30DB}.tar.gz" "${UNIREF30DB}.tar.gz" + downloadFile "https://wwwuser.gwdg.de/~compbiol/colabfold/colabfold_envdb_202108.tar.gz" "colabfold_envdb_202108.tar.gz" + downloadFile "https://wwwuser.gwdg.de/~compbiol/colabfold/pdb100_230517.fasta.gz" "pdb100_230517.fasta.gz" + downloadFile "https://wwwuser.gwdg.de/~compbiol/data/hhsuite/databases/hhsuite_dbs/pdb100_foldseek_230517.tar.gz" "pdb100_foldseek_230517.tar.gz" + touch DOWNLOADS_READY +fi + +if [ ! -f PDB_MMCIF_READY ]; then + mkdir -p pdb/divided + mkdir -p pdb/obsolete + if [ -n "${PDB_AWS_DOWNLOAD}" ]; then + aws s3 cp --no-sign-request --recursive s3://pdbsnapshots/${PDB_AWS_SNAPSHOT}/pub/pdb/data/structures/divided/mmCIF/ pdb/divided/ + aws s3 cp --no-sign-request --recursive s3://pdbsnapshots/${PDB_AWS_SNAPSHOT}/pub/pdb/data/structures/obsolete/mmCIF/ pdb/obsolete/ + fi + rsync -rlpt -v -z --delete --port=${PDB_PORT} ${PDB_SERVER}/data/structures/divided/mmCIF/ pdb/divided + rsync -rlpt -v -z --delete --port=${PDB_PORT} ${PDB_SERVER}/data/structures/obsolete/mmCIF/ pdb/obsolete + touch PDB_MMCIF_READY +fi + +if [ -n "$DOWNLOADS_ONLY" ]; then + exit 0 +fi + + # Make MMseqs2 merge the databases to avoid spamming the folder with files export MMSEQS_FORCE_MERGE=1 +GPU_PAR="" +GPU_INDEX_PAR="" +if [ -n "${GPU}" ]; then + GPU_PAR="--gpu 1" + GPU_INDEX_PAR=" --split 1 --index-subset 2" + + if ! mmseqs --help | grep -q 'gpuserver'; then + echo "The installed MMseqs2 has no GPU support, update to at least release 16" + exit 1 + fi +fi + if [ ! -f UNIREF30_READY ]; then - downloadFile "https://wwwuser.gwdg.de/~compbiol/colabfold/${UNIREF30DB}.tar.gz" "${UNIREF30DB}.tar.gz" tar xzvf "${UNIREF30DB}.tar.gz" - mmseqs tsv2exprofiledb "${UNIREF30DB}" "${UNIREF30DB}_db" + mmseqs tsv2exprofiledb "${UNIREF30DB}" "${UNIREF30DB}_db" ${GPU_PAR} if [ -z "$MMSEQS_NO_INDEX" ]; then - mmseqs createindex "${UNIREF30DB}_db" tmp1 --remove-tmp-files 1 + mmseqs createindex "${UNIREF30DB}_db" tmp1 --remove-tmp-files 1 ${GPU_INDEX_PAR} fi if [ -e ${UNIREF30DB}_db_mapping ]; then ln -sf ${UNIREF30DB}_db_mapping ${UNIREF30DB}_db.idx_mapping @@ -76,40 +115,30 @@ if [ ! -f UNIREF30_READY ]; then fi if [ ! -f COLABDB_READY ]; then - downloadFile "https://wwwuser.gwdg.de/~compbiol/colabfold/colabfold_envdb_202108.tar.gz" "colabfold_envdb_202108.tar.gz" tar xzvf "colabfold_envdb_202108.tar.gz" - mmseqs tsv2exprofiledb "colabfold_envdb_202108" "colabfold_envdb_202108_db" + mmseqs tsv2exprofiledb "colabfold_envdb_202108" "colabfold_envdb_202108_db" ${GPU_PAR} # TODO: split memory value for createindex? if [ -z "$MMSEQS_NO_INDEX" ]; then - mmseqs createindex "colabfold_envdb_202108_db" tmp2 --remove-tmp-files 1 + mmseqs createindex "colabfold_envdb_202108_db" tmp2 --remove-tmp-files 1 ${GPU_INDEX_PAR} fi touch COLABDB_READY fi if [ ! -f PDB_READY ]; then - downloadFile "https://wwwuser.gwdg.de/~compbiol/colabfold/pdb100_230517.fasta.gz" "pdb100_230517.fasta.gz" - mmseqs createdb pdb100_230517.fasta.gz pdb100_230517 + if [ -n "${GPU}" ]; then + mmseqs createdb pdb100_230517.fasta.gz pdb100_230517_tmp + mmseqs makepaddedseqdb pdb100_230517_tmp pdb100_230517 + mmseqs rmdb pdb100_230517_tmp + else + mmseqs createdb pdb100_230517.fasta.gz pdb100_230517 + fi if [ -z "$MMSEQS_NO_INDEX" ]; then - mmseqs createindex pdb100_230517 tmp3 --remove-tmp-files 1 + mmseqs createindex pdb100_230517 tmp3 --remove-tmp-files 1 ${GPU_INDEX_PAR} fi touch PDB_READY fi - if [ ! -f PDB100_READY ]; then - downloadFile "https://wwwuser.gwdg.de/~compbiol/data/hhsuite/databases/hhsuite_dbs/pdb100_foldseek_230517.tar.gz" "pdb100_foldseek_230517.tar.gz" tar xzvf pdb100_foldseek_230517.tar.gz pdb100_a3m.ffdata pdb100_a3m.ffindex touch PDB100_READY fi - -if [ ! -f PDB_MMCIF_READY ]; then - mkdir -p pdb/divided - mkdir -p pdb/obsolete - if [ -n "${PDB_AWS_DOWNLOAD}" ]; then - aws s3 cp --no-sign-request --recursive s3://pdbsnapshots/${PDB_AWS_SNAPSHOT}/pub/pdb/data/structures/divided/mmCIF/ pdb/divided/ - aws s3 cp --no-sign-request --recursive s3://pdbsnapshots/${PDB_AWS_SNAPSHOT}/pub/pdb/data/structures/obsolete/mmCIF/ pdb/obsolete/ - fi - rsync -rlpt -v -z --delete --port=${PDB_PORT} ${PDB_SERVER}/data/structures/divided/mmCIF/ pdb/divided - rsync -rlpt -v -z --delete --port=${PDB_PORT} ${PDB_SERVER}/data/structures/obsolete/mmCIF/ pdb/obsolete - touch PDB_MMCIF_READY -fi \ No newline at end of file