Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Protein Comparison via Model Overlaying using TM Align #192

Merged
merged 16 commits into from
Mar 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion backend/.gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
__pycache__/
.venv
foldseek/
foldseek/
tmalign/
17 changes: 17 additions & 0 deletions backend/src/api/protein.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@
from Bio.PDB import PDBParser
from Bio.SeqUtils import molecular_weight, seq1
from ..db import Database, bytea_to_str, str_to_bytea
from fastapi.exceptions import HTTPException

from ..api_types import ProteinEntry, UploadBody, UploadError, EditBody, CamelModel
from ..auth import requiresAuthentication
from ..tmalign import tm_align
from io import BytesIO
from fastapi import APIRouter
from fastapi.responses import FileResponse, StreamingResponse
Expand Down Expand Up @@ -335,3 +337,18 @@ def edit_protein_entry(body: EditBody, req: Request):

except Exception:
return UploadError.WRITE_ERROR


# /pdb with two attributes returns both PDBs, superimposed and with different colors.
@router.get("/protein/pdb/{proteinA:str}/{proteinB:str}")
def search_proteins(proteinA: str, proteinB: str):
try:
pdbA = stored_pdb_file_name(proteinA)
pdbB = stored_pdb_file_name(proteinB)

file = tm_align(proteinA, pdbA, proteinB, pdbB)

return FileResponse(file, filename=proteinA + "_" + proteinB + ".pdb")
except Exception as e:
log.error(e)
raise HTTPException(status_code=500, detail=str(e))
9 changes: 9 additions & 0 deletions backend/src/api_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,3 +76,12 @@ class LoginBody(CamelModel):
class LoginResponse(CamelModel):
token: str
error: str


class CompareBody(CamelModel):
proteinA: str
proteinB: str


class CompareResponse(CamelModel):
file: list[str]
75 changes: 75 additions & 0 deletions backend/src/tmalign.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import subprocess
import logging as log
import os


def bash_cmd(cmd: str | list[str]) -> str:
return subprocess.check_output(cmd, shell=True).decode()


TMALIGN_LOCATION = "/app/tmalign"
TMALIGN_EXECUTABLE = f"{TMALIGN_LOCATION}/tmalign"


def assert_tmalign_installed():
if os.path.exists(TMALIGN_EXECUTABLE):
return
else:
raise ImportError(
"tm align executable not installed. Please install manually - Automatic install TODO."
)


def parse_pdb(filepath: str) -> list[str]:
with open(filepath, "r") as f:
lines = f.readlines()
return lines


def tm_align(
protein_A: str, pdbA: str, protein_B: str, pdbB: str, type: str = "_all_atm"
):
"""
Description:
Returns two overlaid, aligned, and colored PDB structures in a single PDB file.
The ones without extensions appear to be PDB files.

Params:
protein_A:
The name of the first protein.
pdbA:
The filepath of the first protein.
protein_B:
The name of the second protein.
pdbB:
The filepath of the second protein.
type:
The kind of file you want. Experiment with these! Defaults to _all_atm,
which shows alpha helices and beta sheets. Valid options include:
"", "_all", "_all_atm", "_all_atm_lig", "_atm",
".pml", "_all.pml", "_all_atm.pml", "all_atm_lig.pml", "_atm.pml"
"""
dir_name = protein_A + "-" + protein_B
full_path = f"{TMALIGN_LOCATION}/{dir_name}"
out_file = full_path + "/output"
desired_file = out_file + type

# If the directory already exists, then we've already run TM align for this protein pair. We can just return the file.
if os.path.exists(full_path):
log.warn(f"Path {full_path} already exists. Do not need to run TM align.")

# If the directory doesn't exist, then we need to run TM align and generate the files.
else:
log.warn(f"Path {full_path} does not exist. Creating directory and returning.")
cmd = f"{TMALIGN_EXECUTABLE} {pdbA} {pdbB} -o {out_file}"
try:
bash_cmd(f"mkdir {full_path}")
log.warn(f"Attempting to align now with cmd {cmd}")
stdout = bash_cmd(cmd)
log.warn(stdout)

except Exception as e:
log.warn(e)
raise e

return desired_file
2 changes: 2 additions & 0 deletions frontend/src/Router.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import Edit from "./routes/Edit.svelte";
import Tutorials from "./routes/Tutorials.svelte";
import ForceUploadThumbnails from "./routes/ForceUploadThumbnails.svelte";
import Compare from "./routes/Compare.svelte";
</script>

<Router>
Expand All @@ -27,6 +28,7 @@
>
<Route path="/edit/:id" let:params><Edit urlId={params.id} /></Route>
<Route path="/force-upload-thumbnails"><ForceUploadThumbnails /></Route>
<Route path="/compare/:a/:b" let:params><Compare proteinA={params.a} proteinB={params.b}/></Route>
<Route path="/*"><Error /></Route>
</main>
</Router>
8 changes: 8 additions & 0 deletions frontend/src/lib/SimilarProteins.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
<th> Probability Match</th>
<th> E-Value </th>
<th> Description </th>
<th> Compare </th>
</tr>
{#each similarProteins as protein}
<tr class="pdb-row">
Expand All @@ -41,6 +42,13 @@
<td>{protein.prob}</td>
<td>{protein.evalue}</td>
<td class="pdb-desc">{protein.description}</td>
<td>
<a
use:link
href="/compare/{queryProteinName}/{protein.name}"
><LinkOutline size="sm" />Compare</a
>
</td>
</tr>
{/each}
</table>
Expand Down
90 changes: 90 additions & 0 deletions frontend/src/routes/Compare.svelte
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
<script lang="ts">
import { onMount } from "svelte";
import { Backend, BACKEND_URL, type ProteinEntry } from "../lib/backend";
import ProteinVis from "../lib/ProteinVis.svelte";
import { Button, Dropdown, DropdownItem } from "flowbite-svelte";
import Markdown from "../lib/Markdown.svelte";
import { numberWithCommas, undoFormatProteinName } from "../lib/format";
import { navigate } from "svelte-routing";
import References from "../lib/References.svelte";
import { ChevronDownSolid, PenOutline } from "flowbite-svelte-icons";
import EntryCard from "../lib/EntryCard.svelte";
import SimilarProteins from "../lib/SimilarProteins.svelte";
import DelayedSpinner from "../lib/DelayedSpinner.svelte";
import { user } from "../lib/stores/user";

const fileDownloadDropdown = ["pdb", "fasta"];

export let proteinA: string;
export let proteinB: string;
let combined = proteinA + "/" + proteinB
let urlId="Gh_comp1045_c0_seq1"
let entry: ProteinEntry | null = null;
let error = false;

// when this component mounts, request protein wikipedia entry from backend
onMount(async () => {
// Request the protein from backend given ID
console.log("Requesting", proteinA, "and", proteinB, "info from backend");

entry = await Backend.getProteinEntry(urlId);
// if we could not find the entry, the id is garbo
if (entry == null) error = true;
});
</script>

<svelte:head>
<title>Venome Protein {entry ? entry.name : ""}</title>
</svelte:head>

<section class="flex gap-10 p-5">
{#if entry}
<div id="left-side">
<!-- TITLE AND DESCRIPTION -->
<h1 id="title">
Comparing Proteins
</h1>

<div id="description">
{proteinA} and {proteinB}
</div>
<ProteinVis
format="pdb"
proteinName={combined}
width={750}
height={500}
on:mount={async ({ detail: { screenshot } }) => {
// upload the protein thumbnail if it doesn't exist
if (entry !== null && entry.thumbnail === null) {
const b64 = await screenshot();
const res = await Backend.uploadProteinPng({
proteinName: entry.name,
base64Encoding: b64,
});
}
}}
/>
</div>
{:else if !error}
<!-- Otherwise, tell user we tell the user we are loading -->
<h1><DelayedSpinner text="Loading Protein Entry" /></h1>
{:else if error}
<!-- if we error out, tell the user the id is shiza -->
<h1>Error</h1>
<p>Could not find a protein with the id <code>{urlId}</code></p>
{/if}
</section>

<style>
#left-side {
width: 100%;
}
#right-side {
width: 450px;
}
#title {
font-size: 2.45rem;
font-weight: 500;
color: var(--darkblue);
}
</style>
14 changes: 14 additions & 0 deletions run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,20 @@ function remove_foldseek() {
docker exec -it venome-backend rm -fr foldseek/
}

function add_tmalign() {
docker exec -it venome-backend wget https://seq2fun.dcmb.med.umich.edu//TM-align/TMalign_cpp.gz
docker exec -it venome-backend mkdir tmalign
docker exec -it venome-backend gzip -d TMalign_cpp.gz
docker exec -it venome-backend mv TMalign_cpp tmalign/tmalign
docker exec -it venome-backend chmod a+x tmalign/tmalign
docker exec -it venome-backend rm -f TMalign_cpp.gz
}

function remove_tmalign() {
docker exec -it venome-backend rm -f TMalign_cpp.gz*
docker exec -it venome-backend rm -fr tmalign/
}

function scrape_func_names() {
functions=($(grep -oE 'function[[:space:]]+[a-zA-Z_][a-zA-Z_0-9]*' ./run.sh | sed 's/function[[:space:]]*//'))
}
Expand Down
Loading