Skip to content

Commit

Permalink
Protein Comparison via Model Overlaying using TM Align (#192)
Browse files Browse the repository at this point in the history
* Feat: Compare endpoint which returns a TM-Aligned PDB file.

* Fix: Removed useless overlay test route

* Created incomplete create page.

* Fix: Changed protein compare from post to get to play nicer with Molstar

* Changed /compare to /pdb to play nicer with existing code

* Created rudimentary overlap page

* Made viewport larger for this demo

* Added ability to download tm align to docker via shell command

* Added links on similarity search to redirect user to comparison page

* Linting pass with Ruff

* Format: Removed unused local variables

* Format: Removed unused imports

* Format: Commented out stdout debug print

* Format: Ruff pass on protein.py

* fix: uncomment run tmalign

* fix: add use:link to a tag

---------

Co-authored-by: xnought <bertuccd@oregonstate.edu>
  • Loading branch information
ansengarvin and xnought authored Mar 10, 2024
1 parent ef4b431 commit ac36b17
Show file tree
Hide file tree
Showing 8 changed files with 217 additions and 1 deletion.
3 changes: 2 additions & 1 deletion backend/.gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
__pycache__/
.venv
foldseek/
foldseek/
tmalign/
17 changes: 17 additions & 0 deletions backend/src/api/protein.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@
from Bio.PDB import PDBParser
from Bio.SeqUtils import molecular_weight, seq1
from ..db import Database, bytea_to_str, str_to_bytea
from fastapi.exceptions import HTTPException

from ..api_types import ProteinEntry, UploadBody, UploadError, EditBody, CamelModel
from ..auth import requiresAuthentication
from ..tmalign import tm_align
from io import BytesIO
from fastapi import APIRouter
from fastapi.responses import FileResponse, StreamingResponse
Expand Down Expand Up @@ -331,3 +333,18 @@ def edit_protein_entry(body: EditBody, req: Request):

except Exception:
return UploadError.WRITE_ERROR


# /pdb with two attributes returns both PDBs, superimposed and with different colors.
@router.get("/protein/pdb/{proteinA:str}/{proteinB:str}")
def search_proteins(proteinA: str, proteinB: str):
try:
pdbA = stored_pdb_file_name(proteinA)
pdbB = stored_pdb_file_name(proteinB)

file = tm_align(proteinA, pdbA, proteinB, pdbB)

return FileResponse(file, filename=proteinA + "_" + proteinB + ".pdb")
except Exception as e:
log.error(e)
raise HTTPException(status_code=500, detail=str(e))
9 changes: 9 additions & 0 deletions backend/src/api_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,3 +76,12 @@ class LoginBody(CamelModel):
class LoginResponse(CamelModel):
token: str
error: str


class CompareBody(CamelModel):
proteinA: str
proteinB: str


class CompareResponse(CamelModel):
file: list[str]
75 changes: 75 additions & 0 deletions backend/src/tmalign.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import subprocess
import logging as log
import os


def bash_cmd(cmd: str | list[str]) -> str:
return subprocess.check_output(cmd, shell=True).decode()


TMALIGN_LOCATION = "/app/tmalign"
TMALIGN_EXECUTABLE = f"{TMALIGN_LOCATION}/tmalign"


def assert_tmalign_installed():
if os.path.exists(TMALIGN_EXECUTABLE):
return
else:
raise ImportError(
"tm align executable not installed. Please install manually - Automatic install TODO."
)


def parse_pdb(filepath: str) -> list[str]:
with open(filepath, "r") as f:
lines = f.readlines()
return lines


def tm_align(
protein_A: str, pdbA: str, protein_B: str, pdbB: str, type: str = "_all_atm"
):
"""
Description:
Returns two overlaid, aligned, and colored PDB structures in a single PDB file.
The ones without extensions appear to be PDB files.
Params:
protein_A:
The name of the first protein.
pdbA:
The filepath of the first protein.
protein_B:
The name of the second protein.
pdbB:
The filepath of the second protein.
type:
The kind of file you want. Experiment with these! Defaults to _all_atm,
which shows alpha helices and beta sheets. Valid options include:
"", "_all", "_all_atm", "_all_atm_lig", "_atm",
".pml", "_all.pml", "_all_atm.pml", "all_atm_lig.pml", "_atm.pml"
"""
dir_name = protein_A + "-" + protein_B
full_path = f"{TMALIGN_LOCATION}/{dir_name}"
out_file = full_path + "/output"
desired_file = out_file + type

# If the directory already exists, then we've already run TM align for this protein pair. We can just return the file.
if os.path.exists(full_path):
log.warn(f"Path {full_path} already exists. Do not need to run TM align.")

# If the directory doesn't exist, then we need to run TM align and generate the files.
else:
log.warn(f"Path {full_path} does not exist. Creating directory and returning.")
cmd = f"{TMALIGN_EXECUTABLE} {pdbA} {pdbB} -o {out_file}"
try:
bash_cmd(f"mkdir {full_path}")
log.warn(f"Attempting to align now with cmd {cmd}")
stdout = bash_cmd(cmd)
log.warn(stdout)

except Exception as e:
log.warn(e)
raise e

return desired_file
2 changes: 2 additions & 0 deletions frontend/src/Router.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import Edit from "./routes/Edit.svelte";
import Tutorials from "./routes/Tutorials.svelte";
import ForceUploadThumbnails from "./routes/ForceUploadThumbnails.svelte";
import Compare from "./routes/Compare.svelte";
</script>

<Router>
Expand All @@ -27,6 +28,7 @@
>
<Route path="/edit/:id" let:params><Edit urlId={params.id} /></Route>
<Route path="/force-upload-thumbnails"><ForceUploadThumbnails /></Route>
<Route path="/compare/:a/:b" let:params><Compare proteinA={params.a} proteinB={params.b}/></Route>
<Route path="/*"><Error /></Route>
</main>
</Router>
8 changes: 8 additions & 0 deletions frontend/src/lib/SimilarProteins.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
<th> Probability Match</th>
<th> E-Value </th>
<th> Description </th>
<th> Compare </th>
</tr>
{#each similarProteins as protein}
<tr class="pdb-row">
Expand All @@ -41,6 +42,13 @@
<td>{protein.prob}</td>
<td>{protein.evalue}</td>
<td class="pdb-desc">{protein.description}</td>
<td>
<a
use:link
href="/compare/{queryProteinName}/{protein.name}"
><LinkOutline size="sm" />Compare</a
>
</td>
</tr>
{/each}
</table>
Expand Down
90 changes: 90 additions & 0 deletions frontend/src/routes/Compare.svelte
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
<script lang="ts">
import { onMount } from "svelte";
import { Backend, BACKEND_URL, type ProteinEntry } from "../lib/backend";
import ProteinVis from "../lib/ProteinVis.svelte";
import { Button, Dropdown, DropdownItem } from "flowbite-svelte";
import Markdown from "../lib/Markdown.svelte";
import { numberWithCommas, undoFormatProteinName } from "../lib/format";
import { navigate } from "svelte-routing";
import References from "../lib/References.svelte";
import { ChevronDownSolid, PenOutline } from "flowbite-svelte-icons";
import EntryCard from "../lib/EntryCard.svelte";
import SimilarProteins from "../lib/SimilarProteins.svelte";
import DelayedSpinner from "../lib/DelayedSpinner.svelte";
import { user } from "../lib/stores/user";
const fileDownloadDropdown = ["pdb", "fasta"];
export let proteinA: string;
export let proteinB: string;
let combined = proteinA + "/" + proteinB
let urlId="Gh_comp1045_c0_seq1"
let entry: ProteinEntry | null = null;
let error = false;
// when this component mounts, request protein wikipedia entry from backend
onMount(async () => {
// Request the protein from backend given ID
console.log("Requesting", proteinA, "and", proteinB, "info from backend");
entry = await Backend.getProteinEntry(urlId);
// if we could not find the entry, the id is garbo
if (entry == null) error = true;
});
</script>

<svelte:head>
<title>Venome Protein {entry ? entry.name : ""}</title>
</svelte:head>

<section class="flex gap-10 p-5">
{#if entry}
<div id="left-side">
<!-- TITLE AND DESCRIPTION -->
<h1 id="title">
Comparing Proteins
</h1>

<div id="description">
{proteinA} and {proteinB}
</div>
<ProteinVis
format="pdb"
proteinName={combined}
width={750}
height={500}
on:mount={async ({ detail: { screenshot } }) => {
// upload the protein thumbnail if it doesn't exist
if (entry !== null && entry.thumbnail === null) {
const b64 = await screenshot();
const res = await Backend.uploadProteinPng({
proteinName: entry.name,
base64Encoding: b64,
});
}
}}
/>
</div>
{:else if !error}
<!-- Otherwise, tell user we tell the user we are loading -->
<h1><DelayedSpinner text="Loading Protein Entry" /></h1>
{:else if error}
<!-- if we error out, tell the user the id is shiza -->
<h1>Error</h1>
<p>Could not find a protein with the id <code>{urlId}</code></p>
{/if}
</section>

<style>
#left-side {
width: 100%;
}
#right-side {
width: 450px;
}
#title {
font-size: 2.45rem;
font-weight: 500;
color: var(--darkblue);
}
</style>
14 changes: 14 additions & 0 deletions run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,20 @@ function remove_foldseek() {
docker exec -it venome-backend rm -fr foldseek/
}

function add_tmalign() {
docker exec -it venome-backend wget https://seq2fun.dcmb.med.umich.edu//TM-align/TMalign_cpp.gz
docker exec -it venome-backend mkdir tmalign
docker exec -it venome-backend gzip -d TMalign_cpp.gz
docker exec -it venome-backend mv TMalign_cpp tmalign/tmalign
docker exec -it venome-backend chmod a+x tmalign/tmalign
docker exec -it venome-backend rm -f TMalign_cpp.gz
}

function remove_tmalign() {
docker exec -it venome-backend rm -f TMalign_cpp.gz*
docker exec -it venome-backend rm -fr tmalign/
}

function scrape_func_names() {
functions=($(grep -oE 'function[[:space:]]+[a-zA-Z_][a-zA-Z_0-9]*' ./run.sh | sed 's/function[[:space:]]*//'))
}
Expand Down

0 comments on commit ac36b17

Please sign in to comment.