Skip to content

Commit

Permalink
feat: upload and store into db
Browse files Browse the repository at this point in the history
  • Loading branch information
xnought committed Nov 17, 2023
1 parent ae1f6a4 commit 15906ae
Show file tree
Hide file tree
Showing 4 changed files with 142 additions and 85 deletions.
15 changes: 13 additions & 2 deletions backend/src/api_types.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from pydantic import BaseModel, ConfigDict
import enum


# https://github.com/zeno-ml/zeno-hub/blob/9d2f8b5841d99aeba9ec405b0bc6a5b1272b276f/backend/zeno_backend/classes/base.py#L20
Expand All @@ -25,7 +26,6 @@ class CamelModel(BaseModel):
class ProteinEntry(CamelModel):
name: str
id: str
filePDBAlphaFold: str
length: int
mass: float

Expand All @@ -35,5 +35,16 @@ class AllEntries(CamelModel):


class UploadBody(CamelModel):
name: str
pdb_file_base64: str
pdb_file_name: str


class UploadError(str, enum.Enum):
NAME_NOT_UNIQUE = "NAME_NOT_UNIQUE"
PARSE_ERROR = "PARSE_ERROR"
WRITE_ERROR = "WRITE_ERROR"
QUERY_ERROR = "QUERY_ERROR"


class UploadStatus(CamelModel):
status: UploadError
49 changes: 0 additions & 49 deletions backend/src/file.py

This file was deleted.

102 changes: 102 additions & 0 deletions backend/src/protein.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
import logging as log
import os
from base64 import b64decode
from io import StringIO

from Bio.PDB import PDBParser
from Bio.SeqUtils import molecular_weight, seq1

from .db import Database


def decode_base64(b64_header_and_data: str):
"""Converts a base64 string to bytes"""
# only decode after the header (data:application/octet-stream;base64,)
end_of_header = b64_header_and_data.index(",")
b64_data_only = b64_header_and_data[end_of_header:]
return b64decode(b64_data_only).decode("utf-8")


class PDB:
def __init__(self, file_contents, name=""):
self.name = name
self.file_contents = file_contents

try:
self.parser = PDBParser()
self.structure = self.parser.get_structure(
id=name, file=StringIO(file_contents)
)
except Exception as e:
raise e # raise to the user who calls this PDB class

@property
def pdb_file_name(self):
return f"{os.path.join('src/data/pdbAlphaFold', self.name)}.pdb"

@property
def num_amino_acids(self) -> int:
return len(self.amino_acids())

@property
def mass_daltons(self):
return molecular_weight(seq="".join(self.amino_acids()), seq_type="protein")

def amino_acids(self, one_letter_code=True):
return [
seq1(residue.resname) if one_letter_code else residue.resname
for residue in self.structure.get_residues()
]


class Protein:
@staticmethod
def parse_pdb(name: str, file_contents: str, encoding="str"):
if encoding == "str":
return PDB(file_contents, name)
elif encoding == "b64":
return PDB(decode_base64(file_contents), name)
else:
raise ValueError(f"Invalid encoding: {encoding}")

@staticmethod
def name_taken(name: str):
"""Checks if a protein name already exists in the database
Returns: True if exists | False if not exists
"""
with Database() as db:
try:
entry_sql = db.execute_return(
"""SELECT id FROM proteins
WHERE name = %s""",
[name],
)

# if we got a result back
return entry_sql is not None and len(entry_sql) != 0

except Exception:
return False

@staticmethod
def save(pdb: PDB):
log.warn(pdb.pdb_file_name)
try:
with open(pdb.pdb_file_name, "w") as f:
f.write(pdb.file_contents)
except Exception:
log.warn("could not save")
raise Exception("Could not save pdb file")

with Database() as db:
try:
db.execute(
"""INSERT INTO proteins (name, length, mass) VALUES (%s, %s, %s);""",
[
pdb.name,
pdb.num_amino_acids,
pdb.mass_daltons,
],
)
except Exception as e:
raise e
61 changes: 27 additions & 34 deletions backend/src/server.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from .setup import init_fastapi_app, disable_cors
from .api_types import ProteinEntry, UploadBody
from .api_types import ProteinEntry, UploadBody, UploadStatus, UploadError
from .db import Database
from .file import decode_base64, PDB
from .protein import Protein
import logging as log
from fastapi.staticfiles import StaticFiles

Expand All @@ -21,7 +21,7 @@ def get_all_entries():
with Database() as db:
try:
entries_sql = db.execute_return(
"""SELECT id, name, filePDBAlphaFold, length, mass FROM proteins"""
"""SELECT id, name, length, mass FROM proteins"""
)
log.warn(entries_sql)

Expand All @@ -31,9 +31,8 @@ def get_all_entries():
ProteinEntry(
id=str(entry[0]),
name=entry[1],
filePDBAlphaFold=entry[2],
length=entry[3],
mass=entry[4],
length=entry[2],
mass=entry[3],
)
for entry in entries_sql
]
Expand All @@ -49,7 +48,7 @@ def get_protein_entry(protein_id: str):
with Database() as db:
try:
entry_sql = db.execute_return(
"""SELECT id, name, filePDBAlphaFold, length, mass FROM proteins
"""SELECT id, name, length, mass FROM proteins
WHERE id = %s""",
[protein_id],
)
Expand All @@ -61,40 +60,34 @@ def get_protein_entry(protein_id: str):
return ProteinEntry(
id=str(entry_sql[0][0]),
name=entry_sql[0][1],
filePDBAlphaFold=entry_sql[0][2],
length=entry_sql[0][3],
mass=entry_sql[0][4],
length=entry_sql[0][2],
mass=entry_sql[0][3],
)

except Exception as e:
log.error(e)


@app.post("/protein-upload", response_model=None)
# None return means success
@app.post("/protein-upload", response_model=UploadError | None)
def upload_protein_entry(body: UploadBody):
decoded_pdb = decode_base64(body.pdb_file_base64)
pdb = PDB(file_name=body.pdb_file_name, file_contents=decoded_pdb)

"""
BELOW
TODO: add name to the body, change filepath to be consistent with other data
"""

# write file to disk
with open(f"data/pdbAlphaFold/{pdb.file_name}", "w") as f:
f.write(pdb.file_contents)

# insert in database
with Database() as db:
db.execute(
"""INSERT INTO proteins (name, filePDBAlphaFold, length, mass) VALUES (%s, %s, %s, %s);""",
[
"test2",
pdb.file_name,
pdb.num_amino_acids(),
pdb.computed_mass(),
],
)
body.name = body.name.replace(" ", "_")

# check that the name is not already taken in the DB
if Protein.name_taken(body.name):
return UploadError.NAME_NOT_UNIQUE

# if name is unique, save the pdb file and add the entry to the database
try:
pdb = Protein.parse_pdb(body.name, body.pdb_file_base64, encoding="b64")
except Exception:
return UploadError.PARSE_ERROR

# Save to data/ folder and db
try:
Protein.save(pdb)
except Exception:
return UploadError.WRITE_ERROR


def export_app_for_docker():
Expand Down

0 comments on commit 15906ae

Please sign in to comment.