From 67d03c86084ce6153b2f8b296091948edf526ae0 Mon Sep 17 00:00:00 2001 From: xnought Date: Wed, 31 Jan 2024 22:54:15 -0800 Subject: [PATCH] feat: do everything with the foldseek folder --- backend/src/api/similar.py | 7 +++++-- backend/src/foldseek.py | 26 +++++++++++++++++++++++++- 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/backend/src/api/similar.py b/backend/src/api/similar.py index 73a07c71..bfb5052b 100644 --- a/backend/src/api/similar.py +++ b/backend/src/api/similar.py @@ -4,15 +4,18 @@ pdb_file_name, revert_pdb_filename, ) -from ..foldseek import easy_search, create_db +from ..foldseek import easy_search, external_db router = APIRouter() +PDB = external_db("PDB", "foldseek/dbs/pdb") + + @router.get("/similar-pdb/{protein_name:str}", response_model=list[SimilarProtein]) def get_similar_pdb(protein_name: str): + global PDB query_name = pdb_file_name(protein_name) - PDB = create_db("PDB", "pdb") similar = easy_search(query_name, PDB, out_format="target,prob") return [SimilarProtein(name=s[0].split(".")[0], prob=s[1]) for s in similar] diff --git a/backend/src/foldseek.py b/backend/src/foldseek.py index d3347796..8b5692f3 100644 --- a/backend/src/foldseek.py +++ b/backend/src/foldseek.py @@ -23,7 +23,7 @@ class GenerateDirName: def __enter__(self): global active_caches active_caches += 1 - self.temp_dir = ".foldseek_cache_" + str(active_caches) + self.temp_dir = "foldseek/temp_dir_" + str(active_caches) return self.temp_dir def __exit__(self, *args): @@ -93,6 +93,30 @@ def easy_search( return to_columnar_array(parsed_output) if columnar else parsed_output +def external_db( + external_db_name: str, + db_name: str, + foldseek_executable="foldseek/bin/foldseek", + print_stdout=False, +): + if external_db_name not in EXTERNAL_DATABASES: + raise Exception(f"Directory {external_db_name} not found") + + with GenerateDirName() as temp_dir: + try: + bash_cmd(f"ls {db_name}") + except Exception: + if dir not in EXTERNAL_DATABASES: + cmd = f"{foldseek_executable} createdb {external_db_name} {db_name}" + else: + cmd = f"{foldseek_executable} databases {external_db_name} {db_name} {temp_dir}" + stdout = bash_cmd(cmd) + if print_stdout: + print(stdout) + + return db_name + + def create_db( dir: str, db_name: str,