diff --git a/backend/.gitignore b/backend/.gitignore index 912e757d..b9a0cc6c 100644 --- a/backend/.gitignore +++ b/backend/.gitignore @@ -1,3 +1,4 @@ __pycache__/ .venv -foldseek/ \ No newline at end of file +foldseek/ +tmalign/ \ No newline at end of file diff --git a/backend/src/api/protein.py b/backend/src/api/protein.py index 2791af40..82865ec0 100644 --- a/backend/src/api/protein.py +++ b/backend/src/api/protein.py @@ -5,9 +5,11 @@ from Bio.PDB import PDBParser from Bio.SeqUtils import molecular_weight, seq1 from ..db import Database, bytea_to_str, str_to_bytea +from fastapi.exceptions import HTTPException from ..api_types import ProteinEntry, UploadBody, UploadError, EditBody, CamelModel from ..auth import requiresAuthentication +from ..tmalign import tm_align from io import BytesIO from fastapi import APIRouter from fastapi.responses import FileResponse, StreamingResponse @@ -331,3 +333,18 @@ def edit_protein_entry(body: EditBody, req: Request): except Exception: return UploadError.WRITE_ERROR + + +# /pdb with two attributes returns both PDBs, superimposed and with different colors. +@router.get("/protein/pdb/{proteinA:str}/{proteinB:str}") +def search_proteins(proteinA: str, proteinB: str): + try: + pdbA = stored_pdb_file_name(proteinA) + pdbB = stored_pdb_file_name(proteinB) + + file = tm_align(proteinA, pdbA, proteinB, pdbB) + + return FileResponse(file, filename=proteinA + "_" + proteinB + ".pdb") + except Exception as e: + log.error(e) + raise HTTPException(status_code=500, detail=str(e)) diff --git a/backend/src/api_types.py b/backend/src/api_types.py index c15061e1..55346786 100644 --- a/backend/src/api_types.py +++ b/backend/src/api_types.py @@ -76,3 +76,12 @@ class LoginBody(CamelModel): class LoginResponse(CamelModel): token: str error: str + + +class CompareBody(CamelModel): + proteinA: str + proteinB: str + + +class CompareResponse(CamelModel): + file: list[str] diff --git a/backend/src/tmalign.py b/backend/src/tmalign.py new file mode 100644 index 00000000..3790a329 --- /dev/null +++ b/backend/src/tmalign.py @@ -0,0 +1,75 @@ +import subprocess +import logging as log +import os + + +def bash_cmd(cmd: str | list[str]) -> str: + return subprocess.check_output(cmd, shell=True).decode() + + +TMALIGN_LOCATION = "/app/tmalign" +TMALIGN_EXECUTABLE = f"{TMALIGN_LOCATION}/tmalign" + + +def assert_tmalign_installed(): + if os.path.exists(TMALIGN_EXECUTABLE): + return + else: + raise ImportError( + "tm align executable not installed. Please install manually - Automatic install TODO." + ) + + +def parse_pdb(filepath: str) -> list[str]: + with open(filepath, "r") as f: + lines = f.readlines() + return lines + + +def tm_align( + protein_A: str, pdbA: str, protein_B: str, pdbB: str, type: str = "_all_atm" +): + """ + Description: + Returns two overlaid, aligned, and colored PDB structures in a single PDB file. + The ones without extensions appear to be PDB files. + + Params: + protein_A: + The name of the first protein. + pdbA: + The filepath of the first protein. + protein_B: + The name of the second protein. + pdbB: + The filepath of the second protein. + type: + The kind of file you want. Experiment with these! Defaults to _all_atm, + which shows alpha helices and beta sheets. Valid options include: + "", "_all", "_all_atm", "_all_atm_lig", "_atm", + ".pml", "_all.pml", "_all_atm.pml", "all_atm_lig.pml", "_atm.pml" + """ + dir_name = protein_A + "-" + protein_B + full_path = f"{TMALIGN_LOCATION}/{dir_name}" + out_file = full_path + "/output" + desired_file = out_file + type + + # If the directory already exists, then we've already run TM align for this protein pair. We can just return the file. + if os.path.exists(full_path): + log.warn(f"Path {full_path} already exists. Do not need to run TM align.") + + # If the directory doesn't exist, then we need to run TM align and generate the files. + else: + log.warn(f"Path {full_path} does not exist. Creating directory and returning.") + cmd = f"{TMALIGN_EXECUTABLE} {pdbA} {pdbB} -o {out_file}" + try: + bash_cmd(f"mkdir {full_path}") + log.warn(f"Attempting to align now with cmd {cmd}") + stdout = bash_cmd(cmd) + log.warn(stdout) + + except Exception as e: + log.warn(e) + raise e + + return desired_file diff --git a/frontend/src/Router.svelte b/frontend/src/Router.svelte index c22c1d5b..3c0016ad 100644 --- a/frontend/src/Router.svelte +++ b/frontend/src/Router.svelte @@ -10,6 +10,7 @@ import Edit from "./routes/Edit.svelte"; import Tutorials from "./routes/Tutorials.svelte"; import ForceUploadThumbnails from "./routes/ForceUploadThumbnails.svelte"; + import Compare from "./routes/Compare.svelte"; @@ -27,6 +28,7 @@ > + diff --git a/frontend/src/lib/SimilarProteins.svelte b/frontend/src/lib/SimilarProteins.svelte index a7d797f7..b5e24f74 100644 --- a/frontend/src/lib/SimilarProteins.svelte +++ b/frontend/src/lib/SimilarProteins.svelte @@ -28,6 +28,7 @@ Probability Match E-Value Description + Compare {#each similarProteins as protein} @@ -41,6 +42,13 @@ {protein.prob} {protein.evalue} {protein.description} + + Compare + {/each} diff --git a/frontend/src/routes/Compare.svelte b/frontend/src/routes/Compare.svelte new file mode 100644 index 00000000..b1866b05 --- /dev/null +++ b/frontend/src/routes/Compare.svelte @@ -0,0 +1,90 @@ + + + + Venome Protein {entry ? entry.name : ""} + + +
+ {#if entry} +
+ +

+ Comparing Proteins +

+ +
+ {proteinA} and {proteinB} +
+ { + // upload the protein thumbnail if it doesn't exist + if (entry !== null && entry.thumbnail === null) { + const b64 = await screenshot(); + const res = await Backend.uploadProteinPng({ + proteinName: entry.name, + base64Encoding: b64, + }); + } + }} + /> +
+ {:else if !error} + +

+ {:else if error} + +

Error

+

Could not find a protein with the id {urlId}

+ {/if} +
+ + diff --git a/run.sh b/run.sh index 89ce2c4f..c49310da 100755 --- a/run.sh +++ b/run.sh @@ -115,6 +115,20 @@ function remove_foldseek() { docker exec -it venome-backend rm -fr foldseek/ } +function add_tmalign() { + docker exec -it venome-backend wget https://seq2fun.dcmb.med.umich.edu//TM-align/TMalign_cpp.gz + docker exec -it venome-backend mkdir tmalign + docker exec -it venome-backend gzip -d TMalign_cpp.gz + docker exec -it venome-backend mv TMalign_cpp tmalign/tmalign + docker exec -it venome-backend chmod a+x tmalign/tmalign + docker exec -it venome-backend rm -f TMalign_cpp.gz +} + +function remove_tmalign() { + docker exec -it venome-backend rm -f TMalign_cpp.gz* + docker exec -it venome-backend rm -fr tmalign/ +} + function scrape_func_names() { functions=($(grep -oE 'function[[:space:]]+[a-zA-Z_][a-zA-Z_0-9]*' ./run.sh | sed 's/function[[:space:]]*//')) }