Skip to content

Commit

Permalink
feat: New error message for zero p-values (#353)
Browse files Browse the repository at this point in the history
* feat: New error message for new error type

* test: Fix tests and add more for new error type

* docs: How to run tests

* chore: Exclude venvs and data files

* chore: Update gwas-sumstats-tools to v1.0.22
  • Loading branch information
karatugo authored Jun 28, 2024
1 parent a4f56c8 commit c8cfad7
Show file tree
Hide file tree
Showing 13 changed files with 136 additions and 37 deletions.
8 changes: 5 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,15 @@ __pycache__
# pytest coverage plugin files
htmlcov

# virtualenv
# virtualenvs
.env
formatlint/*
unittest/*

# data
*.tsv*
depo_ss_validated/*
.hypothesis/*
# *.txt*
*.csv*
*.db
Expand All @@ -32,5 +36,3 @@ tox.ini
# build
build/*
data

/formatlint/*
10 changes: 7 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,13 @@ This handles the uploaded summary statistics files, validates them, reports erro

### Run the tests

- Run this, to setup up a RabbitMQ server, run the tests, and tear it all down.
- `tox`

- Make sure that the installation is complete.
- Start locally or `docker-compose up`.
- To setup up a RabbitMQ server, run the tests, and tear it all down:
```bash
rm -rf .tox
tox
```

### Run as a flask app

Expand Down
10 changes: 10 additions & 0 deletions depo_data/ABC1234/test_invalid.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
chromosome base_pair_location effect_allele other_allele beta standard_error effect_allele_frequency p_value rsid ci_upper ci_lower test nmiss stat odds_ratio
1 768448 A GT #NA 0.05618 0.1 0.2665 rs12562034 1.188 0.9534 ADD 9339 1.111 1.064
1 1005806 T 0.2 0.04895 0.001 0.7089 rs3934834 1.081 0.8921 ADD 9326 -0.3734 0.9819
1 1018704 A LONG_STRING 0.3 0.03484 0.1 0.2141 rs9442372 1.118 0.9753 ADD 9333 1.242 1.044
2 1021415 A C 0.4 0.03737 0.4 0.1572 #NA 1.134 0.9798 ADD 9337 1.415 1.054
1 1030565 T AAA 0.5 0.04809 0.1 0.9591 rs6687776 1.102 0.9123 ADD 9349 0.05122 1.002
1 1062638 C G 0.6 0.03471 0.9 0.1 rs9442373 0.1 0.9364 ADD 9349 0.06799 1.002
1 1064979 T TT 0.7 0.04997 0.8 0.99 rs2298217 1.113 0.9147 ADD 9346 0.1765 1.009
30 1087683 T TG 0.06883 0.89 0.4408 10E-200 rs9442380 1.207 0.9214 ADD 9347 0.7708 1.054
1 1099342 A AAAA 0.1 0.07066 0.7 0.2309 rs9660710 1.25 0.9476 ADD 9346 1.198 1.088
10 changes: 10 additions & 0 deletions depo_data/ABC1234/test_sumstats_file.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
chromosome base_pair_location effect_allele other_allele beta standard_error effect_allele_frequency p_value rsid ci_upper ci_lower test nmiss stat odds_ratio
1 768448 A GT 0.1 0.05618 0.1 0.2665 rs12562034 1.188 0.9534 ADD 9339 1.111 1.064
1 1005806 T G 0.2 0.04895 0.001 0.7089 rs3934834 1.081 0.8921 ADD 9326 -0.3734 0.9819
1 1018704 A LONG_STRING 0.3 0.03484 0.1 0.2141 rs9442372 1.118 0.9753 ADD 9333 1.242 1.044
2 1021415 A C 0.4 0.03737 0.4 0.1572 #NA 1.134 0.9798 ADD 9337 1.415 1.054
1 1030565 T AAA 0.5 0.04809 0.1 0.9591 rs6687776 1.102 0.9123 ADD 9349 0.05122 1.002
1 1062638 C G 0.6 0.03471 0.9 0.1 rs9442373 0.1 0.9364 ADD 9349 0.06799 1.002
1 1064979 T TT 0.7 0.04997 0.8 0.99 rs2298217 1.113 0.9147 ADD 9346 0.1765 1.009
25 1087683 T TG 0.06883 0.89 0.4408 10E-200 rs9442380 1.207 0.9214 ADD 9347 0.7708 1.054
1 1099342 A AAAA 0.1 0.07066 0.7 0.2309 rs9660710 1.25 0.9476 ADD 9346 1.198 1.088
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ globus-cli==3.12.0
globus-sdk==3.17.0
greenlet==2.0.2
gunicorn==19.9.0
gwas-sumstats-tools==1.0.20
gwas-sumstats-tools==1.0.22
humanize==4.6.0
hypothesis==6.68.2
idna==3.4
Expand Down
4 changes: 4 additions & 0 deletions sumstats_service/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,10 @@ def _env_variable_else(env_var_name, default):
"errorText": "There is a problem on our side, please contact gwas-subs@ebi.ac.uk for further advice.",
},
{"id": 11, "errorText": "The raw sumstats file can not be found"},
{
"id": 12,
"errorText": "Analysis software must be provided in the metadata template for summary statistics containing 0 p-values.",
},
]

VALID_ASSEMBLIES = ["GRCh38", "GRCh37", "NCBI36", "NCBI35", "NCBI34", "NR"]
Expand Down
28 changes: 16 additions & 12 deletions sumstats_service/resources/file_handler.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import csv
import gzip
import hashlib
import io
import logging
import os
import pathlib
Expand Down Expand Up @@ -187,6 +186,7 @@ def validate_file(self):
"minrows": 9,
"data": 3,
"field order": 7,
"p_val": 12,
}
self.validation_error = error_to_code_dict.get(validator.primary_error_type)
if validator.errors_table:
Expand Down Expand Up @@ -246,19 +246,23 @@ def move_file_to_staging(self):
"""
TODO: move raw ss if needed
"""

source_dir = os.path.join(config.STORAGE_PATH, self.callback_id)
source_file_without_ext = os.path.join(source_dir, self.study_id)
source_file = add_ext_to_file_without_ext(source_file_without_ext)
dest_dir = os.path.join(config.STAGING_PATH, self.staging_dir_name)
ext = get_ext_for_file(file_path=source_file)
dest_file = os.path.join(dest_dir, self.staging_file_name + ext)

try:
source_dir = os.path.join(config.STORAGE_PATH, self.callback_id)
source_file_without_ext = os.path.join(source_dir, self.study_id)
source_file = add_ext_to_file_without_ext(source_file_without_ext)
dest_dir = os.path.join(config.STAGING_PATH, self.staging_dir_name)
ext = get_ext_for_file(file_path=source_file)
dest_file = os.path.join(dest_dir, self.staging_file_name + ext)

pathlib.Path(dest_dir).mkdir(parents=True, exist_ok=True)
shutil.move(source_file, dest_file)
except (IndexError, FileNotFoundError, OSError, ValueError) as e:
logger.error(
"Error: {}\nCould not move file {} to staging,\ "
"callback ID: {}".format(e, self.staging_file_name, self.callback_id)
"Error: {}\nCould not move file {} to staging, callback ID: {}".format(
e, self.staging_file_name, self.callback_id
)
)
# Attempt to clean up by removing the directory if it's empty
try:
Expand All @@ -267,7 +271,7 @@ def move_file_to_staging(self):
# Directory not empty or other issue, cannot remove
pass

raise
raise

return True

Expand Down Expand Up @@ -326,7 +330,7 @@ def get_source_file_from_id(source_dir, source):
source_with_ext = None
ext = None
filter_files = [
f for f in [f for f in files if not ".README" in f] if not ".log" in f
f for f in [f for f in files if ".README" not in f] if ".log" not in f
]
if filter_files:
for f in filter_files:
Expand All @@ -344,7 +348,7 @@ def mv_file_with_globus(dest_dir, source, dest):
# create the new dir
try:
globus.mkdir(unique_id=dest_dir)
except:
except Exception:
pass
status = globus.rename_file(dest_dir, source, dest)
return status
Expand Down
10 changes: 10 additions & 0 deletions tests/ABC1234/test_sumstats_file_zero_p_values.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
chromosome base_pair_location effect_allele other_allele beta standard_error effect_allele_frequency p_value rsid ci_upper ci_lower test nmiss stat odds_ratio
1 768448 A GT 0.1 0.05618 0.1 0 rs12562034 1.188 0.9534 ADD 9339 1.111 1.064
1 1005806 T G 0.2 0.04895 0.001 0 rs3934834 1.081 0.8921 ADD 9326 -0.3734 0.9819
1 1018704 A LONG_STRING 0.3 0.03484 0.1 0.2141 rs9442372 1.118 0.9753 ADD 9333 1.242 1.044
2 1021415 A C 0.4 0.03737 0.4 0.1572 #NA 1.134 0.9798 ADD 9337 1.415 1.054
1 1030565 T AAA 0.5 0.04809 0.1 0.9591 rs6687776 1.102 0.9123 ADD 9349 0.05122 1.002
1 1062638 C G 0.6 0.03471 0.9 0.1 rs9442373 0.1 0.9364 ADD 9349 0.06799 1.002
1 1064979 T TT 0.7 0.04997 0.8 0.99 rs2298217 1.113 0.9147 ADD 9346 0.1765 1.009
25 1087683 T TG 0.06883 0.89 0.4408 10E-200 rs9442380 1.207 0.9214 ADD 9347 0.7708 1.054
1 1099342 A AAAA 0.1 0.07066 0.7 0.2309 rs9660710 1.25 0.9476 ADD 9346 1.198 1.088
11 changes: 8 additions & 3 deletions tests/test_api_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

import sumstats_service.resources.api_utils as au
from sumstats_service import config
from tests.test_constants import *
from tests.test_constants import VALID_POST


class TestAPIUtils(unittest.TestCase):
Expand Down Expand Up @@ -35,8 +35,13 @@ def setUp(self):
}

def tearDown(self):
client = MongoClient(config.MONGO_URI)
client.drop_database(config.MONGO_DB)
mongo_uri = os.getenv("MONGO_URI", config.MONGO_URI)
mongo_user = os.getenv("MONGO_USER", None)
mongo_password = os.getenv("MONGO_PASSWORD", None)
mongo_db = os.getenv("MONGO_DB", config.MONGO_DB)

client = MongoClient(mongo_uri, username=mongo_user, password=mongo_password)
client.drop_database(mongo_db)

def test_json_payload_to_db(self):
result = au.json_payload_to_db(VALID_POST)
Expand Down
12 changes: 8 additions & 4 deletions tests/test_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

from sumstats_service import config
from sumstats_service.app import app, celery
from tests.test_constants import *


class TestAPP:
Expand All @@ -17,8 +16,13 @@ def setup_method(self, method):
)

def teardown_method(self, method):
client = MongoClient(config.MONGO_URI)
client.drop_database(config.MONGO_DB)
mongo_uri = os.getenv("MONGO_URI", config.MONGO_URI)
mongo_user = os.getenv("MONGO_USER", None)
mongo_password = os.getenv("MONGO_PASSWORD", None)
mongo_db = os.getenv("MONGO_DB", config.MONGO_DB)

client = MongoClient(mongo_uri, username=mongo_user, password=mongo_password)
client.drop_database(mongo_db)

def test_index(self):
tester = app.test_client(self)
Expand Down Expand Up @@ -113,4 +117,4 @@ def test_bad_callback_id(self):


if __name__ == "__main__":
unittest.main()
unittest.main() # noqa
12 changes: 8 additions & 4 deletions tests/test_payload.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@

import sumstats_service.resources.payload as pl
from sumstats_service import config
from sumstats_service.resources.error_classes import *
from tests.test_constants import *
from tests.test_constants import VALID_POST


class TestPayload(unittest.TestCase):
Expand All @@ -17,8 +16,13 @@ def setUp(self):
config.BROKER_HOST = "localhost"

def tearDown(self):
client = MongoClient(config.MONGO_URI)
client.drop_database(config.MONGO_DB)
mongo_uri = os.getenv("MONGO_URI", config.MONGO_URI)
mongo_user = os.getenv("MONGO_USER", None)
mongo_password = os.getenv("MONGO_PASSWORD", None)
mongo_db = os.getenv("MONGO_DB", config.MONGO_DB)

client = MongoClient(mongo_uri, username=mongo_user, password=mongo_password)
client.drop_database(mongo_db)

def test_generate_callback_id(self):
payload = pl.Payload()
Expand Down
27 changes: 25 additions & 2 deletions tests/test_study_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,23 @@ def setUp(self):
self.valid_file_md5 = "9b5f307016408b70cde2c9342648aa9b"
self.assembly = "GRCh38"
self.valid_file = "test_sumstats_file.tsv"
self.file_zero_p_values = "test_sumstats_file_zero_p_values.tsv"
self.md5_file_zero_p_values = "912032fda7691a6e811f54bc66168f98"
self.test_validate_path = os.path.join(config.VALIDATED_PATH, self.callback_id)
os.makedirs(config.STORAGE_PATH, exist_ok=True)
os.makedirs(self.test_validate_path, exist_ok=True)

def tearDown(self):
shutil.rmtree(self.test_storepath)
shutil.rmtree(self.test_validate_path)
client = MongoClient(config.MONGO_URI)
client.drop_database(config.MONGO_DB)

mongo_uri = os.getenv("MONGO_URI", config.MONGO_URI)
mongo_user = os.getenv("MONGO_USER", None)
mongo_password = os.getenv("MONGO_PASSWORD", None)
mongo_db = os.getenv("MONGO_DB", config.MONGO_DB)

client = MongoClient(mongo_uri, username=mongo_user, password=mongo_password)
client.drop_database(mongo_db)

def test_valid_study_id(self):
study = st.Study(
Expand Down Expand Up @@ -260,6 +268,21 @@ def test_validate_study_not_enough_rows(self):
study.retrieve_study_file()
study.validate_study(minrows=100)
self.assertEqual(study.data_valid, 0)
self.assertEqual(study.error_code, 9)

def test_validate_invalid_study_zero_p_values(self):
study = st.Study(
study_id=self.study_id,
file_path=self.file_zero_p_values,
md5=self.md5_file_zero_p_values,
assembly=self.assembly,
callback_id=self.callback_id,
entryUUID=self.entryUUID,
)
study.retrieve_study_file()
study.validate_study(minrows=2, zero_p_values=False)
self.assertEqual(study.data_valid, 0)
self.assertEqual(study.error_code, 12)


if __name__ == "__main__":
Expand Down
29 changes: 24 additions & 5 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,26 @@
envlist = py39

[testenv]
deps=pifpaf
pika
deps =
pifpaf
pika
pytest==7.4.4
pytest-cov==2.7.1
python-magic
pymongo==4.3.3
globus-cli==3.12.0
globus-sdk==3.17.0
simplejson==3.16.0
shortuuid==0.5.0
celery==5.4.0
numpy==1.25.0
pandas==1.5.3
pandera==0.13.4
gwas-sumstats-tools==1.0.22
; local gwas-sumstats-tools, e.g.,
; /Users/karatugo/Documents/GitHub/gwas-sumstats-tools/dist/gwas_sumstats_tools-1.0.20.tar.gz
; or
; gwas-sumstats-tools==1.0.20
setenv =
CELERY_PROTOCOL = amqp
CELERY_USER = pifpaf
Expand All @@ -12,6 +30,7 @@ setenv =
QUEUE_PORT = 5682
MONGO_DB = mongotest
MONGO_URI = mongodb://127.0.0.1:27017

commands=
pifpaf run rabbitmq --port 5682 -- pytest --cov-report html --cov sumstats_service --verbose
MONGO_USER=myuser
MONGO_PASSWORD=mypassword
commands =
pifpaf run rabbitmq --port 5682 -- pytest -s --cov-report html --cov sumstats_service --verbose

0 comments on commit c8cfad7

Please sign in to comment.