From 22f09239629e34ae1914140f400e3b10ceb5f908 Mon Sep 17 00:00:00 2001 From: xnought Date: Wed, 31 Jan 2024 22:44:01 -0800 Subject: [PATCH] fix: caches must be named differently --- backend/src/api/similar.py | 4 +-- backend/src/foldseek.py | 53 ++++++++++++++++++++++++-------------- 2 files changed, 36 insertions(+), 21 deletions(-) diff --git a/backend/src/api/similar.py b/backend/src/api/similar.py index ac8852b8..73a07c71 100644 --- a/backend/src/api/similar.py +++ b/backend/src/api/similar.py @@ -13,7 +13,7 @@ def get_similar_pdb(protein_name: str): query_name = pdb_file_name(protein_name) PDB = create_db("PDB", "pdb") - similar = easy_search(query_name, PDB, out_format=["target", "prob"]) + similar = easy_search(query_name, PDB, out_format="target,prob") return [SimilarProtein(name=s[0].split(".")[0], prob=s[1]) for s in similar] @@ -21,5 +21,5 @@ def get_similar_pdb(protein_name: str): def get_similar_venome(protein_name: str): query_name = pdb_file_name(protein_name) target_folder = "src/data/pdbAlphaFold/" - similar = easy_search(query_name, target_folder, out_format=["target", "prob"]) + similar = easy_search(query_name, target_folder, out_format="target,prob") return [SimilarProtein(name=revert_pdb_filename(s[0]), prob=s[1]) for s in similar] diff --git a/backend/src/foldseek.py b/backend/src/foldseek.py index d2e53a89..d3347796 100644 --- a/backend/src/foldseek.py +++ b/backend/src/foldseek.py @@ -1,5 +1,6 @@ import subprocess import logging as log +from functools import lru_cache EXTERNAL_DATABASES = [ "Alphafold/UniProt", @@ -15,6 +16,22 @@ def bash_cmd(cmd: str | list[str]) -> str: return subprocess.check_output(cmd, shell=True).decode() +active_caches = 0 + + +class GenerateDirName: + def __enter__(self): + global active_caches + active_caches += 1 + self.temp_dir = ".foldseek_cache_" + str(active_caches) + return self.temp_dir + + def __exit__(self, *args): + global active_caches + active_caches -= 1 + bash_cmd("rm -rf " + self.temp_dir) + + def to_columnar_array(arr: list[list]) -> list[list]: columnar = [] for i in range(len(arr[0])): @@ -42,12 +59,11 @@ def parse_output(filepath: str) -> list[list]: return parsed_lines +@lru_cache(maxsize=32) def easy_search( query: str, target: str, - out_format: list[str] = ["query", "target", "prob"], - out_file=".foldseek_cache/output", - temp_dir=".foldseek_cache", + out_format: str = "query, target, prob", print_stdout=False, foldseek_executable="./foldseek/bin/foldseek", columnar=False, @@ -58,23 +74,23 @@ def easy_search( Returns: list[list]: a list of the matches from the search """ + with GenerateDirName() as temp_dir: + out_file = temp_dir + "/output" + + # Then call the easy-search + flags = f"--format-output {out_format}" if out_format else "" + cmd = f"{foldseek_executable} easy-search {query} {target} {out_file} {temp_dir} {flags}" + try: + stdout = bash_cmd(cmd) + except Exception as e: + log.warn(e) + return [] - # Then call the easy-search - flags = f"--format-output {','.join(out_format)}" if len(out_format) > 0 else "" - cmd = f"{foldseek_executable} easy-search {query} {target} {out_file} {temp_dir} {flags}" - try: - stdout = bash_cmd(cmd) - except Exception as e: - log.warn(e) - return [] - - if print_stdout: - log.warn(stdout) + if print_stdout: + log.warn(stdout) - if columnar: - return to_columnar_array(parse_output(out_file)) - else: - return parse_output(out_file) + parsed_output = parse_output(out_file) + return to_columnar_array(parsed_output) if columnar else parsed_output def create_db( @@ -112,6 +128,5 @@ def create_db( output = easy_search( query=test_targets, target=test_targets, - out_format=["query", "target", "prob"], ) print(output)