From de8a6a07f348d8584ab675348e7ffc7b40d5f1cb Mon Sep 17 00:00:00 2001 From: ansengarvin <45224464+ansengarvin@users.noreply.github.com> Date: Thu, 18 Apr 2024 19:41:04 -0700 Subject: [PATCH 1/8] Added /protein/alignment endpoint, which re-runs TM align and grabs what it prints to the console. --- backend/src/api/protein.py | 21 +++++++++++++++++++++ backend/src/tmalign.py | 6 +++--- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/backend/src/api/protein.py b/backend/src/api/protein.py index 4e55a169..bedb0369 100644 --- a/backend/src/api/protein.py +++ b/backend/src/api/protein.py @@ -397,3 +397,24 @@ def align_proteins(proteinA: str, proteinB: str): except Exception as e: log.error(e) raise HTTPException(status_code=500, detail=str(e)) + +# Returns the alignment string info from TM Align's console log. +@router.get("/protein/alignment/{proteinA:str}/{proteinB:str}", response_model=list[str]) +def align_proteins(proteinA: str, proteinB: str): + if not protein_name_found(proteinA) or not protein_name_found(proteinB): + raise HTTPException( + status_code=404, detail="One of the proteins provided is not found in DB" + ) + try: + filepath_pdbA = stored_pdb_file_name(proteinA) + filepath_pdbB = stored_pdb_file_name(proteinB) + tmalign_output = tm_align_return(filepath_pdbA, filepath_pdbB, 1) + + log.warn("TM Align Output follows:") + tmalign_output_list = tmalign_output.splitlines() + tmalign_curated = tmalign_output_list[18:21] + log.warn(tmalign_curated) + return tmalign_curated + except Exception as e: + log.error(e) + raise HTTPException(status_code=500, detail=str(e)) diff --git a/backend/src/tmalign.py b/backend/src/tmalign.py index ad0f727d..7c2bef38 100644 --- a/backend/src/tmalign.py +++ b/backend/src/tmalign.py @@ -101,7 +101,7 @@ def tm_align( return desired_file -def tm_align_return(pdbA: str, pdbB: str) -> str: +def tm_align_return(pdbA: str, pdbB: str, consoleOutput = False) -> str: """ Description: Returns two overlaid, aligned, and colored PDB structures in a single PDB file. @@ -121,10 +121,10 @@ def tm_align_return(pdbA: str, pdbB: str) -> str: with UniqueTempDir(base_path=TMALIGN_LOCATION) as temp_dir_path: try: output_location = os.path.join(temp_dir_path, "output") - cmd = f"{TMALIGN_EXECUTABLE} {pdbA} {pdbB} -o {output_location}" + cmd = f"{TMALIGN_EXECUTABLE} {pdbA} {pdbB} -o {output_location} > {output_location}.txt" bash_cmd(cmd) - tmalign_pdb_path = f"{output_location}_all_atm" + tmalign_pdb_path = f"{output_location}_all_atm" if not consoleOutput else f"{output_location}.txt" with open(tmalign_pdb_path, "r") as tmalign_pdb_file: tmalign_pdb_file_str = tmalign_pdb_file.read() From ab9c5ba44824e8f894a810bf2d9541b1de3de48c Mon Sep 17 00:00:00 2001 From: ansengarvin <45224464+ansengarvin@users.noreply.github.com> Date: Thu, 18 Apr 2024 19:46:44 -0700 Subject: [PATCH 2/8] Added explanatory code comment --- backend/src/api/protein.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/backend/src/api/protein.py b/backend/src/api/protein.py index bedb0369..15cb1613 100644 --- a/backend/src/api/protein.py +++ b/backend/src/api/protein.py @@ -380,7 +380,13 @@ def edit_protein_entry(body: EditBody, req: Request): except Exception: raise HTTPException(500, "Edit failed, git gud") +""" +Q: Why not combine /protein/pdb and /protein/alignment into one endpoint which returns the same value, since + it looks like they do the same thing? +A: In order to display a PDB in Mol*, we need to pass it an HTTP URL. That's the primary purpose of /protein/pdb - + to give it that URL. As such, we need a different endpoint to get the alignment logs from TMalign. +""" # /pdb with two attributes returns both PDBs, superimposed and with different colors. @router.get("/protein/pdb/{proteinA:str}/{proteinB:str}", response_model=str) def align_proteins(proteinA: str, proteinB: str): @@ -398,6 +404,7 @@ def align_proteins(proteinA: str, proteinB: str): log.error(e) raise HTTPException(status_code=500, detail=str(e)) + # Returns the alignment string info from TM Align's console log. @router.get("/protein/alignment/{proteinA:str}/{proteinB:str}", response_model=list[str]) def align_proteins(proteinA: str, proteinB: str): From b05839e87656214cc3bac7f37c875196ccc40ba4 Mon Sep 17 00:00:00 2001 From: ansengarvin <45224464+ansengarvin@users.noreply.github.com> Date: Thu, 2 May 2024 19:08:48 -0700 Subject: [PATCH 3/8] Added additional TM Align info added via API endpoint --- backend/src/api/protein.py | 50 ++++++++++++++----- frontend/src/lib/openapi/index.ts | 1 + .../src/lib/openapi/models/TMAlignInfo.ts | 13 +++++ .../lib/openapi/services/DefaultService.ts | 24 +++++++++ frontend/src/routes/Align.svelte | 1 + 5 files changed, 77 insertions(+), 12 deletions(-) create mode 100644 frontend/src/lib/openapi/models/TMAlignInfo.ts diff --git a/backend/src/api/protein.py b/backend/src/api/protein.py index 15cb1613..75624d69 100644 --- a/backend/src/api/protein.py +++ b/backend/src/api/protein.py @@ -14,6 +14,7 @@ from fastapi import APIRouter from fastapi.responses import FileResponse, StreamingResponse from fastapi.requests import Request +import re router = APIRouter() @@ -380,13 +381,6 @@ def edit_protein_entry(body: EditBody, req: Request): except Exception: raise HTTPException(500, "Edit failed, git gud") -""" -Q: Why not combine /protein/pdb and /protein/alignment into one endpoint which returns the same value, since - it looks like they do the same thing? - -A: In order to display a PDB in Mol*, we need to pass it an HTTP URL. That's the primary purpose of /protein/pdb - - to give it that URL. As such, we need a different endpoint to get the alignment logs from TMalign. -""" # /pdb with two attributes returns both PDBs, superimposed and with different colors. @router.get("/protein/pdb/{proteinA:str}/{proteinB:str}", response_model=str) def align_proteins(proteinA: str, proteinB: str): @@ -404,10 +398,17 @@ def align_proteins(proteinA: str, proteinB: str): log.error(e) raise HTTPException(status_code=500, detail=str(e)) +class TMAlignInfo(CamelModel): + aligned_length: str | None + rmsd: str | None + seq_id: str | None + chain1_tmscore: str | None + chain2_tmscore: str | None + alignment_string: list[str] # Returns the alignment string info from TM Align's console log. -@router.get("/protein/alignment/{proteinA:str}/{proteinB:str}", response_model=list[str]) -def align_proteins(proteinA: str, proteinB: str): +@router.get("/protein/tmalign/{proteinA:str}/{proteinB:str}", response_model=TMAlignInfo) +def tm_info(proteinA: str, proteinB: str): if not protein_name_found(proteinA) or not protein_name_found(proteinB): raise HTTPException( status_code=404, detail="One of the proteins provided is not found in DB" @@ -418,10 +419,35 @@ def align_proteins(proteinA: str, proteinB: str): tmalign_output = tm_align_return(filepath_pdbA, filepath_pdbB, 1) log.warn("TM Align Output follows:") + # Split TMAlign data into an array format tmalign_output_list = tmalign_output.splitlines() - tmalign_curated = tmalign_output_list[18:21] - log.warn(tmalign_curated) - return tmalign_curated + log.warn(tmalign_output) + + # Grab aligned length, RMSD, and + tmalign_tri = tmalign_output_list[12].split(', ') + + log.warn("Aligned Length:") + log.warn(tmalign_tri[0]) + # Note: \d+?.\d* means "match 1 or more numbers, 0 or 1 decimal points, and 0 or more numbers" in regex + aligned_length = re.search("\d+?.\d*", tmalign_tri[0]).group() + rmsd = re.search("\d+.?\d*", tmalign_tri[1]).group() + seq_id = re.search("\d+.?\d*", tmalign_tri[2]).group() + + tmalign_string= tmalign_output_list[18:21] + + # NOTE: This is ONLY grabbing the TM-Score from the file. It's leaving out the LN and d0 stats. + chain1_normalized_tm_score = re.search("\d+.?\d*", tmalign_output_list[13]).group() + chain2_normalized_tm_score = re.search("\d+.?\d*", tmalign_output_list[14]).group() + + + return TMAlignInfo( + aligned_length = aligned_length, + rmsd = rmsd, + seq_id = seq_id, + chain1_tmscore = chain1_normalized_tm_score, + chain2_tmscore = chain2_normalized_tm_score, + alignment_string = tmalign_string + ) except Exception as e: log.error(e) raise HTTPException(status_code=500, detail=str(e)) diff --git a/frontend/src/lib/openapi/index.ts b/frontend/src/lib/openapi/index.ts index f94854ae..c2a924f0 100644 --- a/frontend/src/lib/openapi/index.ts +++ b/frontend/src/lib/openapi/index.ts @@ -29,6 +29,7 @@ export type { RangeFilter } from './models/RangeFilter'; export type { SearchProteinsBody } from './models/SearchProteinsBody'; export type { SearchProteinsResults } from './models/SearchProteinsResults'; export type { SimilarProtein } from './models/SimilarProtein'; +export type { TMAlignInfo } from './models/TMAlignInfo'; export type { UploadArticleImageComponent } from './models/UploadArticleImageComponent'; export type { UploadArticleProteinComponent } from './models/UploadArticleProteinComponent'; export type { UploadArticleTextComponent } from './models/UploadArticleTextComponent'; diff --git a/frontend/src/lib/openapi/models/TMAlignInfo.ts b/frontend/src/lib/openapi/models/TMAlignInfo.ts new file mode 100644 index 00000000..a72087c5 --- /dev/null +++ b/frontend/src/lib/openapi/models/TMAlignInfo.ts @@ -0,0 +1,13 @@ +/* generated using openapi-typescript-codegen -- do not edit */ +/* istanbul ignore file */ +/* tslint:disable */ +/* eslint-disable */ +export type TMAlignInfo = { + alignedLength: (string | null); + rmsd: (string | null); + seqId: (string | null); + chain1Tmscore: (string | null); + chain2Tmscore: (string | null); + alignmentString: Array; +}; + diff --git a/frontend/src/lib/openapi/services/DefaultService.ts b/frontend/src/lib/openapi/services/DefaultService.ts index d125304c..daab1902 100644 --- a/frontend/src/lib/openapi/services/DefaultService.ts +++ b/frontend/src/lib/openapi/services/DefaultService.ts @@ -20,6 +20,7 @@ import type { RangeFilter } from '../models/RangeFilter'; import type { SearchProteinsBody } from '../models/SearchProteinsBody'; import type { SearchProteinsResults } from '../models/SearchProteinsResults'; import type { SimilarProtein } from '../models/SimilarProtein'; +import type { TMAlignInfo } from '../models/TMAlignInfo'; import type { UploadArticleImageComponent } from '../models/UploadArticleImageComponent'; import type { UploadArticleProteinComponent } from '../models/UploadArticleProteinComponent'; import type { UploadArticleTextComponent } from '../models/UploadArticleTextComponent'; @@ -329,6 +330,29 @@ export class DefaultService { }, }); } + /** + * Tm Info + * @param proteinA + * @param proteinB + * @returns TMAlignInfo Successful Response + * @throws ApiError + */ + public static tmInfo( + proteinA: string, + proteinB: string, + ): CancelablePromise { + return __request(OpenAPI, { + method: 'GET', + url: '/protein/tmalign/{proteinA}/{proteinB}', + path: { + 'proteinA': proteinA, + 'proteinB': proteinB, + }, + errors: { + 422: `Validation Error`, + }, + }); + } /** * Get Article * get_article diff --git a/frontend/src/routes/Align.svelte b/frontend/src/routes/Align.svelte index 20e28915..ecd6a6a6 100644 --- a/frontend/src/routes/Align.svelte +++ b/frontend/src/routes/Align.svelte @@ -10,6 +10,7 @@ import * as d3 from "d3"; import { undoFormatProteinName } from "../lib/format"; import AlignBlock from "../lib/AlignBlock.svelte"; + import { AccordionItem, Accordion } from "flowbite-svelte"; export let proteinA: string; export let proteinB: string; From 606c593bb0cb82ed5090008200cca453581f1da9 Mon Sep 17 00:00:00 2001 From: ansengarvin <45224464+ansengarvin@users.noreply.github.com> Date: Thu, 2 May 2024 19:31:52 -0700 Subject: [PATCH 4/8] Added TM-Align Data under accordion in compare page --- backend/src/api/protein.py | 10 ++-- .../src/lib/openapi/models/TMAlignInfo.ts | 4 +- .../lib/openapi/services/DefaultService.ts | 4 +- frontend/src/routes/Align.svelte | 47 ++++++++++++++++++- 4 files changed, 54 insertions(+), 11 deletions(-) diff --git a/backend/src/api/protein.py b/backend/src/api/protein.py index 75624d69..11c4f007 100644 --- a/backend/src/api/protein.py +++ b/backend/src/api/protein.py @@ -402,13 +402,13 @@ class TMAlignInfo(CamelModel): aligned_length: str | None rmsd: str | None seq_id: str | None - chain1_tmscore: str | None - chain2_tmscore: str | None + chain1_tm_score: str | None + chain2_tm_score: str | None alignment_string: list[str] # Returns the alignment string info from TM Align's console log. @router.get("/protein/tmalign/{proteinA:str}/{proteinB:str}", response_model=TMAlignInfo) -def tm_info(proteinA: str, proteinB: str): +def get_tm_info(proteinA: str, proteinB: str): if not protein_name_found(proteinA) or not protein_name_found(proteinB): raise HTTPException( status_code=404, detail="One of the proteins provided is not found in DB" @@ -444,8 +444,8 @@ def tm_info(proteinA: str, proteinB: str): aligned_length = aligned_length, rmsd = rmsd, seq_id = seq_id, - chain1_tmscore = chain1_normalized_tm_score, - chain2_tmscore = chain2_normalized_tm_score, + chain1_tm_score = chain1_normalized_tm_score, + chain2_tm_score = chain2_normalized_tm_score, alignment_string = tmalign_string ) except Exception as e: diff --git a/frontend/src/lib/openapi/models/TMAlignInfo.ts b/frontend/src/lib/openapi/models/TMAlignInfo.ts index a72087c5..6e673ab9 100644 --- a/frontend/src/lib/openapi/models/TMAlignInfo.ts +++ b/frontend/src/lib/openapi/models/TMAlignInfo.ts @@ -6,8 +6,8 @@ export type TMAlignInfo = { alignedLength: (string | null); rmsd: (string | null); seqId: (string | null); - chain1Tmscore: (string | null); - chain2Tmscore: (string | null); + chain1TmScore: (string | null); + chain2TmScore: (string | null); alignmentString: Array; }; diff --git a/frontend/src/lib/openapi/services/DefaultService.ts b/frontend/src/lib/openapi/services/DefaultService.ts index daab1902..21ffde6b 100644 --- a/frontend/src/lib/openapi/services/DefaultService.ts +++ b/frontend/src/lib/openapi/services/DefaultService.ts @@ -331,13 +331,13 @@ export class DefaultService { }); } /** - * Tm Info + * Get Tm Info * @param proteinA * @param proteinB * @returns TMAlignInfo Successful Response * @throws ApiError */ - public static tmInfo( + public static getTmInfo( proteinA: string, proteinB: string, ): CancelablePromise { diff --git a/frontend/src/routes/Align.svelte b/frontend/src/routes/Align.svelte index ecd6a6a6..8987ce98 100644 --- a/frontend/src/routes/Align.svelte +++ b/frontend/src/routes/Align.svelte @@ -2,7 +2,7 @@ import TMAlignEntry from "../lib/ProteinLinkCard.svelte"; import { onMount } from "svelte"; - import { Backend, BACKEND_URL, type SimilarProtein, type ProteinEntry } from "../lib/backend"; + import { Backend, BACKEND_URL, type SimilarProtein, type ProteinEntry, type TMAlignInfo } from "../lib/backend"; import Molstar from "../lib/Molstar.svelte"; import DelayedSpinner from "../lib/DelayedSpinner.svelte"; import { DownloadOutline } from "flowbite-svelte-icons"; @@ -20,6 +20,8 @@ let foldseekData: SimilarProtein; let foldseekError = false; let error = false; + let tmData: TMAlignInfo; + let tmDataError = false; const dark2green = d3.schemeDark2[0]; const dark2orange = d3.schemeDark2[1]; @@ -47,8 +49,16 @@ ); foldseekError = true; } - + try { + tmData = await Backend.getTmInfo(proteinA, proteinB) + } catch (e) { + console.error(e); + console.error("NEED TO DOWMLOAD T M ALIGN IN THE SERVER. SEE THE SERVER ERROR MESSAGE.") + tmDataError = true; + } + + // if we could not find the entry, the id is garbo if (entryA == null || entryB == null) error = true; console.log(entryA, entryB); @@ -73,6 +83,39 @@ + + + TM-Align Data + {#if tmData === undefined && ! tmDataError} + + {:else if tmData !== undefined} +
+ Aligned Length: {tmData.alignedLength} +
+
+ RMSD: {tmData.rmsd} +
+
+ Seq_ID: {tmData.seqId} +
+
+ TM Score (Chain 1-Normalized): {tmData.chain1TmScore} +
+
+ TM Score (Chain 2-Normalized): {tmData.chain2TmScore} +
+ {/if} +
+

Foldseek Data

From d4ca3f4597c7c43ccf1eb8f417f2f8a04fbdee07 Mon Sep 17 00:00:00 2001 From: ansengarvin <45224464+ansengarvin@users.noreply.github.com> Date: Thu, 2 May 2024 19:36:07 -0700 Subject: [PATCH 5/8] Moved foldseek data on compare page to accordion object --- frontend/src/routes/Align.svelte | 51 ++++++++++++++++++-------------- 1 file changed, 29 insertions(+), 22 deletions(-) diff --git a/frontend/src/routes/Align.svelte b/frontend/src/routes/Align.svelte index 8987ce98..b44a185c 100644 --- a/frontend/src/routes/Align.svelte +++ b/frontend/src/routes/Align.svelte @@ -116,33 +116,40 @@ {/if} -

- Foldseek Data -

- {#if foldseekData === undefined && !foldseekError} + + + Foldseek Data + {#if foldseekData === undefined && !foldseekError} - {:else if foldseekData !== undefined} -
- Prob. Match: {foldseekData.prob} -
-
- E-Value: {foldseekData.evalue} -
-
- Region of Similarity - -
- {/if} + {:else if foldseekData !== undefined} +
+ Prob. Match: {foldseekData.prob} +
+
+ E-Value: {foldseekData.evalue} +
+
+ Region of Similarity + +
+ {/if} +
+