diff --git a/.gitignore b/.gitignore index d62f599..5f37bf6 100644 --- a/.gitignore +++ b/.gitignore @@ -11,11 +11,15 @@ __pycache__ # pytest coverage plugin files htmlcov -# virtualenv +# virtualenvs .env +formatlint/* +unittest/* # data *.tsv* +depo_ss_validated/* +.hypothesis/* # *.txt* *.csv* *.db @@ -32,5 +36,3 @@ tox.ini # build build/* data - -/formatlint/* \ No newline at end of file diff --git a/README.md b/README.md index 1a54ddf..8b30e45 100644 --- a/README.md +++ b/README.md @@ -33,9 +33,13 @@ This handles the uploaded summary statistics files, validates them, reports erro ### Run the tests -- Run this, to setup up a RabbitMQ server, run the tests, and tear it all down. -- `tox` - +- Make sure that the installation is complete. +- Start locally or `docker-compose up`. +- To setup up a RabbitMQ server, run the tests, and tear it all down: + ```bash + rm -rf .tox + tox + ``` ### Run as a flask app diff --git a/depo_data/ABC1234/test_invalid.tsv b/depo_data/ABC1234/test_invalid.tsv new file mode 100644 index 0000000..f09365f --- /dev/null +++ b/depo_data/ABC1234/test_invalid.tsv @@ -0,0 +1,10 @@ +chromosome base_pair_location effect_allele other_allele beta standard_error effect_allele_frequency p_value rsid ci_upper ci_lower test nmiss stat odds_ratio +1 768448 A GT #NA 0.05618 0.1 0.2665 rs12562034 1.188 0.9534 ADD 9339 1.111 1.064 +1 1005806 T 0.2 0.04895 0.001 0.7089 rs3934834 1.081 0.8921 ADD 9326 -0.3734 0.9819 +1 1018704 A LONG_STRING 0.3 0.03484 0.1 0.2141 rs9442372 1.118 0.9753 ADD 9333 1.242 1.044 +2 1021415 A C 0.4 0.03737 0.4 0.1572 #NA 1.134 0.9798 ADD 9337 1.415 1.054 +1 1030565 T AAA 0.5 0.04809 0.1 0.9591 rs6687776 1.102 0.9123 ADD 9349 0.05122 1.002 +1 1062638 C G 0.6 0.03471 0.9 0.1 rs9442373 0.1 0.9364 ADD 9349 0.06799 1.002 +1 1064979 T TT 0.7 0.04997 0.8 0.99 rs2298217 1.113 0.9147 ADD 9346 0.1765 1.009 +30 1087683 T TG 0.06883 0.89 0.4408 10E-200 rs9442380 1.207 0.9214 ADD 9347 0.7708 1.054 +1 1099342 A AAAA 0.1 0.07066 0.7 0.2309 rs9660710 1.25 0.9476 ADD 9346 1.198 1.088 \ No newline at end of file diff --git a/depo_data/ABC1234/test_sumstats_file.tsv b/depo_data/ABC1234/test_sumstats_file.tsv new file mode 100644 index 0000000..b570ccc --- /dev/null +++ b/depo_data/ABC1234/test_sumstats_file.tsv @@ -0,0 +1,10 @@ +chromosome base_pair_location effect_allele other_allele beta standard_error effect_allele_frequency p_value rsid ci_upper ci_lower test nmiss stat odds_ratio +1 768448 A GT 0.1 0.05618 0.1 0.2665 rs12562034 1.188 0.9534 ADD 9339 1.111 1.064 +1 1005806 T G 0.2 0.04895 0.001 0.7089 rs3934834 1.081 0.8921 ADD 9326 -0.3734 0.9819 +1 1018704 A LONG_STRING 0.3 0.03484 0.1 0.2141 rs9442372 1.118 0.9753 ADD 9333 1.242 1.044 +2 1021415 A C 0.4 0.03737 0.4 0.1572 #NA 1.134 0.9798 ADD 9337 1.415 1.054 +1 1030565 T AAA 0.5 0.04809 0.1 0.9591 rs6687776 1.102 0.9123 ADD 9349 0.05122 1.002 +1 1062638 C G 0.6 0.03471 0.9 0.1 rs9442373 0.1 0.9364 ADD 9349 0.06799 1.002 +1 1064979 T TT 0.7 0.04997 0.8 0.99 rs2298217 1.113 0.9147 ADD 9346 0.1765 1.009 +25 1087683 T TG 0.06883 0.89 0.4408 10E-200 rs9442380 1.207 0.9214 ADD 9347 0.7708 1.054 +1 1099342 A AAAA 0.1 0.07066 0.7 0.2309 rs9660710 1.25 0.9476 ADD 9346 1.198 1.088 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index c0a5bfc..8be8740 100644 --- a/requirements.txt +++ b/requirements.txt @@ -41,7 +41,7 @@ globus-cli==3.12.0 globus-sdk==3.17.0 greenlet==2.0.2 gunicorn==19.9.0 -gwas-sumstats-tools==1.0.20 +gwas-sumstats-tools==1.0.22 humanize==4.6.0 hypothesis==6.68.2 idna==3.4 diff --git a/sumstats_service/config.py b/sumstats_service/config.py index 418f114..c20b9b5 100644 --- a/sumstats_service/config.py +++ b/sumstats_service/config.py @@ -153,6 +153,10 @@ def _env_variable_else(env_var_name, default): "errorText": "There is a problem on our side, please contact gwas-subs@ebi.ac.uk for further advice.", }, {"id": 11, "errorText": "The raw sumstats file can not be found"}, + { + "id": 12, + "errorText": "Analysis software must be provided in the metadata template for summary statistics containing 0 p-values.", + }, ] VALID_ASSEMBLIES = ["GRCh38", "GRCh37", "NCBI36", "NCBI35", "NCBI34", "NR"] diff --git a/sumstats_service/resources/file_handler.py b/sumstats_service/resources/file_handler.py index adcaa31..214c5e3 100644 --- a/sumstats_service/resources/file_handler.py +++ b/sumstats_service/resources/file_handler.py @@ -1,7 +1,6 @@ import csv import gzip import hashlib -import io import logging import os import pathlib @@ -187,6 +186,7 @@ def validate_file(self): "minrows": 9, "data": 3, "field order": 7, + "p_val": 12, } self.validation_error = error_to_code_dict.get(validator.primary_error_type) if validator.errors_table: @@ -246,19 +246,23 @@ def move_file_to_staging(self): """ TODO: move raw ss if needed """ + + source_dir = os.path.join(config.STORAGE_PATH, self.callback_id) + source_file_without_ext = os.path.join(source_dir, self.study_id) + source_file = add_ext_to_file_without_ext(source_file_without_ext) + dest_dir = os.path.join(config.STAGING_PATH, self.staging_dir_name) + ext = get_ext_for_file(file_path=source_file) + dest_file = os.path.join(dest_dir, self.staging_file_name + ext) + try: - source_dir = os.path.join(config.STORAGE_PATH, self.callback_id) - source_file_without_ext = os.path.join(source_dir, self.study_id) - source_file = add_ext_to_file_without_ext(source_file_without_ext) - dest_dir = os.path.join(config.STAGING_PATH, self.staging_dir_name) - ext = get_ext_for_file(file_path=source_file) - dest_file = os.path.join(dest_dir, self.staging_file_name + ext) + pathlib.Path(dest_dir).mkdir(parents=True, exist_ok=True) shutil.move(source_file, dest_file) except (IndexError, FileNotFoundError, OSError, ValueError) as e: logger.error( - "Error: {}\nCould not move file {} to staging,\ " - "callback ID: {}".format(e, self.staging_file_name, self.callback_id) + "Error: {}\nCould not move file {} to staging, callback ID: {}".format( + e, self.staging_file_name, self.callback_id + ) ) # Attempt to clean up by removing the directory if it's empty try: @@ -267,7 +271,7 @@ def move_file_to_staging(self): # Directory not empty or other issue, cannot remove pass - raise + raise return True @@ -326,7 +330,7 @@ def get_source_file_from_id(source_dir, source): source_with_ext = None ext = None filter_files = [ - f for f in [f for f in files if not ".README" in f] if not ".log" in f + f for f in [f for f in files if ".README" not in f] if ".log" not in f ] if filter_files: for f in filter_files: @@ -344,7 +348,7 @@ def mv_file_with_globus(dest_dir, source, dest): # create the new dir try: globus.mkdir(unique_id=dest_dir) - except: + except Exception: pass status = globus.rename_file(dest_dir, source, dest) return status diff --git a/tests/ABC1234/test_sumstats_file_zero_p_values.tsv b/tests/ABC1234/test_sumstats_file_zero_p_values.tsv new file mode 100644 index 0000000..18cac68 --- /dev/null +++ b/tests/ABC1234/test_sumstats_file_zero_p_values.tsv @@ -0,0 +1,10 @@ +chromosome base_pair_location effect_allele other_allele beta standard_error effect_allele_frequency p_value rsid ci_upper ci_lower test nmiss stat odds_ratio +1 768448 A GT 0.1 0.05618 0.1 0 rs12562034 1.188 0.9534 ADD 9339 1.111 1.064 +1 1005806 T G 0.2 0.04895 0.001 0 rs3934834 1.081 0.8921 ADD 9326 -0.3734 0.9819 +1 1018704 A LONG_STRING 0.3 0.03484 0.1 0.2141 rs9442372 1.118 0.9753 ADD 9333 1.242 1.044 +2 1021415 A C 0.4 0.03737 0.4 0.1572 #NA 1.134 0.9798 ADD 9337 1.415 1.054 +1 1030565 T AAA 0.5 0.04809 0.1 0.9591 rs6687776 1.102 0.9123 ADD 9349 0.05122 1.002 +1 1062638 C G 0.6 0.03471 0.9 0.1 rs9442373 0.1 0.9364 ADD 9349 0.06799 1.002 +1 1064979 T TT 0.7 0.04997 0.8 0.99 rs2298217 1.113 0.9147 ADD 9346 0.1765 1.009 +25 1087683 T TG 0.06883 0.89 0.4408 10E-200 rs9442380 1.207 0.9214 ADD 9347 0.7708 1.054 +1 1099342 A AAAA 0.1 0.07066 0.7 0.2309 rs9660710 1.25 0.9476 ADD 9346 1.198 1.088 \ No newline at end of file diff --git a/tests/test_api_utils.py b/tests/test_api_utils.py index 32a8409..f7f5955 100644 --- a/tests/test_api_utils.py +++ b/tests/test_api_utils.py @@ -6,7 +6,7 @@ import sumstats_service.resources.api_utils as au from sumstats_service import config -from tests.test_constants import * +from tests.test_constants import VALID_POST class TestAPIUtils(unittest.TestCase): @@ -35,8 +35,13 @@ def setUp(self): } def tearDown(self): - client = MongoClient(config.MONGO_URI) - client.drop_database(config.MONGO_DB) + mongo_uri = os.getenv("MONGO_URI", config.MONGO_URI) + mongo_user = os.getenv("MONGO_USER", None) + mongo_password = os.getenv("MONGO_PASSWORD", None) + mongo_db = os.getenv("MONGO_DB", config.MONGO_DB) + + client = MongoClient(mongo_uri, username=mongo_user, password=mongo_password) + client.drop_database(mongo_db) def test_json_payload_to_db(self): result = au.json_payload_to_db(VALID_POST) diff --git a/tests/test_app.py b/tests/test_app.py index ca05e38..1653d60 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -4,7 +4,6 @@ from sumstats_service import config from sumstats_service.app import app, celery -from tests.test_constants import * class TestAPP: @@ -17,8 +16,13 @@ def setup_method(self, method): ) def teardown_method(self, method): - client = MongoClient(config.MONGO_URI) - client.drop_database(config.MONGO_DB) + mongo_uri = os.getenv("MONGO_URI", config.MONGO_URI) + mongo_user = os.getenv("MONGO_USER", None) + mongo_password = os.getenv("MONGO_PASSWORD", None) + mongo_db = os.getenv("MONGO_DB", config.MONGO_DB) + + client = MongoClient(mongo_uri, username=mongo_user, password=mongo_password) + client.drop_database(mongo_db) def test_index(self): tester = app.test_client(self) @@ -113,4 +117,4 @@ def test_bad_callback_id(self): if __name__ == "__main__": - unittest.main() + unittest.main() # noqa diff --git a/tests/test_payload.py b/tests/test_payload.py index 00d08dc..042f9d1 100644 --- a/tests/test_payload.py +++ b/tests/test_payload.py @@ -5,8 +5,7 @@ import sumstats_service.resources.payload as pl from sumstats_service import config -from sumstats_service.resources.error_classes import * -from tests.test_constants import * +from tests.test_constants import VALID_POST class TestPayload(unittest.TestCase): @@ -17,8 +16,13 @@ def setUp(self): config.BROKER_HOST = "localhost" def tearDown(self): - client = MongoClient(config.MONGO_URI) - client.drop_database(config.MONGO_DB) + mongo_uri = os.getenv("MONGO_URI", config.MONGO_URI) + mongo_user = os.getenv("MONGO_USER", None) + mongo_password = os.getenv("MONGO_PASSWORD", None) + mongo_db = os.getenv("MONGO_DB", config.MONGO_DB) + + client = MongoClient(mongo_uri, username=mongo_user, password=mongo_password) + client.drop_database(mongo_db) def test_generate_callback_id(self): payload = pl.Payload() diff --git a/tests/test_study_service.py b/tests/test_study_service.py index 08e9c28..6706da2 100644 --- a/tests/test_study_service.py +++ b/tests/test_study_service.py @@ -23,6 +23,8 @@ def setUp(self): self.valid_file_md5 = "9b5f307016408b70cde2c9342648aa9b" self.assembly = "GRCh38" self.valid_file = "test_sumstats_file.tsv" + self.file_zero_p_values = "test_sumstats_file_zero_p_values.tsv" + self.md5_file_zero_p_values = "912032fda7691a6e811f54bc66168f98" self.test_validate_path = os.path.join(config.VALIDATED_PATH, self.callback_id) os.makedirs(config.STORAGE_PATH, exist_ok=True) os.makedirs(self.test_validate_path, exist_ok=True) @@ -30,8 +32,14 @@ def setUp(self): def tearDown(self): shutil.rmtree(self.test_storepath) shutil.rmtree(self.test_validate_path) - client = MongoClient(config.MONGO_URI) - client.drop_database(config.MONGO_DB) + + mongo_uri = os.getenv("MONGO_URI", config.MONGO_URI) + mongo_user = os.getenv("MONGO_USER", None) + mongo_password = os.getenv("MONGO_PASSWORD", None) + mongo_db = os.getenv("MONGO_DB", config.MONGO_DB) + + client = MongoClient(mongo_uri, username=mongo_user, password=mongo_password) + client.drop_database(mongo_db) def test_valid_study_id(self): study = st.Study( @@ -260,6 +268,21 @@ def test_validate_study_not_enough_rows(self): study.retrieve_study_file() study.validate_study(minrows=100) self.assertEqual(study.data_valid, 0) + self.assertEqual(study.error_code, 9) + + def test_validate_invalid_study_zero_p_values(self): + study = st.Study( + study_id=self.study_id, + file_path=self.file_zero_p_values, + md5=self.md5_file_zero_p_values, + assembly=self.assembly, + callback_id=self.callback_id, + entryUUID=self.entryUUID, + ) + study.retrieve_study_file() + study.validate_study(minrows=2, zero_p_values=False) + self.assertEqual(study.data_valid, 0) + self.assertEqual(study.error_code, 12) if __name__ == "__main__": diff --git a/tox.ini b/tox.ini index e0251cd..c3da080 100644 --- a/tox.ini +++ b/tox.ini @@ -2,8 +2,26 @@ envlist = py39 [testenv] -deps=pifpaf - pika +deps = + pifpaf + pika + pytest==7.4.4 + pytest-cov==2.7.1 + python-magic + pymongo==4.3.3 + globus-cli==3.12.0 + globus-sdk==3.17.0 + simplejson==3.16.0 + shortuuid==0.5.0 + celery==5.4.0 + numpy==1.25.0 + pandas==1.5.3 + pandera==0.13.4 + gwas-sumstats-tools==1.0.22 + ; local gwas-sumstats-tools, e.g., + ; /Users/karatugo/Documents/GitHub/gwas-sumstats-tools/dist/gwas_sumstats_tools-1.0.20.tar.gz + ; or + ; gwas-sumstats-tools==1.0.20 setenv = CELERY_PROTOCOL = amqp CELERY_USER = pifpaf @@ -12,6 +30,7 @@ setenv = QUEUE_PORT = 5682 MONGO_DB = mongotest MONGO_URI = mongodb://127.0.0.1:27017 - -commands= - pifpaf run rabbitmq --port 5682 -- pytest --cov-report html --cov sumstats_service --verbose + MONGO_USER=myuser + MONGO_PASSWORD=mypassword +commands = + pifpaf run rabbitmq --port 5682 -- pytest -s --cov-report html --cov sumstats_service --verbose