Skip to content

Commit

Permalink
feat: upload entire 400 proteins script (#102)
Browse files Browse the repository at this point in the history
* feat: upload all script

* feat: delete script too

* feat: delete too

* docs: add command to docs for upload

* fix: right alpha fold link
  • Loading branch information
xnought authored Dec 10, 2023
1 parent 4935364 commit f938fbd
Show file tree
Hide file tree
Showing 10 changed files with 76 additions and 4,492 deletions.
50 changes: 0 additions & 50 deletions backend/init.sql
Original file line number Diff line number Diff line change
Expand Up @@ -46,33 +46,6 @@ CREATE TABLE species (
PRIMARY KEY (species_id, protein_id)
);

/*
* Inserts example proteins into proteins table
*/
INSERT INTO proteins (name, length, mass, content, refs) VALUES (
'Gh_comp271_c0_seq1',
0,
0.0,
null,
null
);

INSERT INTO proteins (name, length, mass, content, refs) VALUES (
'Lb17_comp535_c2_seq1',
0,
0.0,
null,
null
);

INSERT INTO proteins (name, length, mass, content, refs) VALUES (
'Lh14_comp2336_c0_seq1',
0,
0.0,
null,
null
);

/*
* Inserts example species into species table
*/
Expand All @@ -81,27 +54,4 @@ INSERT INTO species(name) VALUES ('leptopilina boulardi');
INSERT INTO species(name) VALUES ('leptopilina heterotoma');
INSERT INTO species(name) VALUES ('unknown');

/*
* Inserts connections between species and proteins
*/
INSERT INTO species_proteins(species_id, protein_id) VALUES (
1, -- 'ganaspis hookeri',
1 -- 'Gh_comp271_c0_seq1'
);

/*
* Inserts connections between species and proteins
*/
INSERT INTO species_proteins(species_id, protein_id) VALUES (
2, -- 'leptopilina boulardi',
2 -- 'Lb17_comp535_c2_seq1'
);

/*
* Inserts connections between species and proteins
*/
INSERT INTO species_proteins(species_id, protein_id) VALUES (
3, -- 'leptopilina heterotoma',
3 --'Lh14_comp2336_c0_seq1'
);

1 change: 1 addition & 0 deletions backend/src/data/pdbAlphaFold/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.pdb
1,087 changes: 0 additions & 1,087 deletions backend/src/data/pdbAlphaFold/Gh_comp271_c0_seq1.pdb

This file was deleted.

1,646 changes: 0 additions & 1,646 deletions backend/src/data/pdbAlphaFold/Lb17_comp535_c2_seq1.pdb

This file was deleted.

1,709 changes: 0 additions & 1,709 deletions backend/src/data/pdbAlphaFold/Lh14_comp2336_c0_seq1.pdb

This file was deleted.

2 changes: 2 additions & 0 deletions docs/run.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ sh run.sh <cmd>
| `test` | Runs all unit tests |
| `test_backend` | Runs only backend unit tests |
| `test_frontend` | Runs only frontend unit tests |
| `upload_all` | 👉 Uploads all the pdb files to the system via POST requests |
| `delete_all` | Deletes all protein entries and restarts the server from scratch |

There are actually many more functions, so please check out [`run.sh`](../run.sh).

Expand Down
13 changes: 13 additions & 0 deletions galaxy/delete_all.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import requests
import os

DIR = "../backend/src/data/pdbAlphaFold"


def delete_protein_files():
os.system(f"rm -fr {DIR}")
os.system(f"mkdir {DIR}")
os.system(f"echo *.pdb > {DIR}/.gitignore")


delete_protein_files()
Binary file added galaxy/master_venom_galaxy.zip
Binary file not shown.
52 changes: 52 additions & 0 deletions galaxy/upload_all.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import requests
import os

CONTENT = "From the [Venom Biochemistry & Molecular Biology Laboratory](https://venombiochemistrylab.weebly.com/) and predicted using [AlphaFold](https://github.com/google-deepmind/alphafold)."
REFS = ""
DIR = "./master_venom_galaxy"


def unzip_box():
os.system(f"unzip {DIR}.zip")


def remove_box():
os.system(f"rm -rf {DIR}")


def upload_protein_file(path, name, species_name, content="", refs=""):
with open(path, "r") as f:
pdb_file_str = f.read()

payload = {
"name": name,
"species_name": species_name,
"content": content,
"refs": refs,
"pdb_file_str": pdb_file_str,
}
out = requests.post("http://localhost:8000/protein-upload", json=payload)
return out


def upload_all():
unzip_box()
available_species = {
"Gh": "ganaspis hookeri",
"Lb": "leptopilina boulardi",
"Lh": "leptopilina heterotoma",
"*": "unknown",
}
for fn in os.listdir(DIR):
if fn.endswith(".pdb"):
full_path = os.path.join(DIR, fn)
name = fn.split(".")[0].replace("_", " ")
species_name = available_species[fn[:2]]
upload_protein_file(
full_path, name, species_name, content=CONTENT, refs=REFS
)
print("uploaded", full_path, name, species_name)
remove_box()


upload_all()
8 changes: 8 additions & 0 deletions run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,14 @@ function hard_restart() {
reload_init_sql
}

function upload_all() {
cd galaxy && python3 upload_all.py
}

function delete_all() {
cd galaxy && python3 delete_all.py && soft_restart
}

function scrape_func_names() {
functions=($(grep -oE 'function[[:space:]]+[a-zA-Z_][a-zA-Z_0-9]*' ./run.sh | sed 's/function[[:space:]]*//'))
}
Expand Down

0 comments on commit f938fbd

Please sign in to comment.