Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: upload entire 400 proteins script #102

Merged
merged 5 commits into from
Dec 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 0 additions & 50 deletions backend/init.sql
Original file line number Diff line number Diff line change
Expand Up @@ -46,33 +46,6 @@ CREATE TABLE species (
PRIMARY KEY (species_id, protein_id)
);

/*
* Inserts example proteins into proteins table
*/
INSERT INTO proteins (name, length, mass, content, refs) VALUES (
'Gh_comp271_c0_seq1',
0,
0.0,
null,
null
);

INSERT INTO proteins (name, length, mass, content, refs) VALUES (
'Lb17_comp535_c2_seq1',
0,
0.0,
null,
null
);

INSERT INTO proteins (name, length, mass, content, refs) VALUES (
'Lh14_comp2336_c0_seq1',
0,
0.0,
null,
null
);

/*
* Inserts example species into species table
*/
Expand All @@ -81,27 +54,4 @@ INSERT INTO species(name) VALUES ('leptopilina boulardi');
INSERT INTO species(name) VALUES ('leptopilina heterotoma');
INSERT INTO species(name) VALUES ('unknown');

/*
* Inserts connections between species and proteins
*/
INSERT INTO species_proteins(species_id, protein_id) VALUES (
1, -- 'ganaspis hookeri',
1 -- 'Gh_comp271_c0_seq1'
);

/*
* Inserts connections between species and proteins
*/
INSERT INTO species_proteins(species_id, protein_id) VALUES (
2, -- 'leptopilina boulardi',
2 -- 'Lb17_comp535_c2_seq1'
);

/*
* Inserts connections between species and proteins
*/
INSERT INTO species_proteins(species_id, protein_id) VALUES (
3, -- 'leptopilina heterotoma',
3 --'Lh14_comp2336_c0_seq1'
);

1 change: 1 addition & 0 deletions backend/src/data/pdbAlphaFold/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.pdb
1,087 changes: 0 additions & 1,087 deletions backend/src/data/pdbAlphaFold/Gh_comp271_c0_seq1.pdb

This file was deleted.

1,646 changes: 0 additions & 1,646 deletions backend/src/data/pdbAlphaFold/Lb17_comp535_c2_seq1.pdb

This file was deleted.

1,709 changes: 0 additions & 1,709 deletions backend/src/data/pdbAlphaFold/Lh14_comp2336_c0_seq1.pdb

This file was deleted.

2 changes: 2 additions & 0 deletions docs/run.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ sh run.sh <cmd>
| `test` | Runs all unit tests |
| `test_backend` | Runs only backend unit tests |
| `test_frontend` | Runs only frontend unit tests |
| `upload_all` | 👉 Uploads all the pdb files to the system via POST requests |
| `delete_all` | Deletes all protein entries and restarts the server from scratch |

There are actually many more functions, so please check out [`run.sh`](../run.sh).

Expand Down
13 changes: 13 additions & 0 deletions galaxy/delete_all.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import requests
import os

DIR = "../backend/src/data/pdbAlphaFold"


def delete_protein_files():
os.system(f"rm -fr {DIR}")
os.system(f"mkdir {DIR}")
os.system(f"echo *.pdb > {DIR}/.gitignore")


delete_protein_files()
Binary file added galaxy/master_venom_galaxy.zip
Binary file not shown.
52 changes: 52 additions & 0 deletions galaxy/upload_all.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import requests
import os

CONTENT = "From the [Venom Biochemistry & Molecular Biology Laboratory](https://venombiochemistrylab.weebly.com/) and predicted using [AlphaFold](https://github.com/google-deepmind/alphafold)."
REFS = ""
DIR = "./master_venom_galaxy"


def unzip_box():
os.system(f"unzip {DIR}.zip")


def remove_box():
os.system(f"rm -rf {DIR}")


def upload_protein_file(path, name, species_name, content="", refs=""):
with open(path, "r") as f:
pdb_file_str = f.read()

payload = {
"name": name,
"species_name": species_name,
"content": content,
"refs": refs,
"pdb_file_str": pdb_file_str,
}
out = requests.post("http://localhost:8000/protein-upload", json=payload)
return out


def upload_all():
unzip_box()
available_species = {
"Gh": "ganaspis hookeri",
"Lb": "leptopilina boulardi",
"Lh": "leptopilina heterotoma",
"*": "unknown",
}
for fn in os.listdir(DIR):
if fn.endswith(".pdb"):
full_path = os.path.join(DIR, fn)
name = fn.split(".")[0].replace("_", " ")
species_name = available_species[fn[:2]]
upload_protein_file(
full_path, name, species_name, content=CONTENT, refs=REFS
)
print("uploaded", full_path, name, species_name)
remove_box()


upload_all()
8 changes: 8 additions & 0 deletions run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,14 @@ function hard_restart() {
reload_init_sql
}

function upload_all() {
cd galaxy && python3 upload_all.py
}

function delete_all() {
cd galaxy && python3 delete_all.py && soft_restart
}

function scrape_func_names() {
functions=($(grep -oE 'function[[:space:]]+[a-zA-Z_][a-zA-Z_0-9]*' ./run.sh | sed 's/function[[:space:]]*//'))
}
Expand Down