Skip to content

Commit

Permalink
wait, double quotes
Browse files Browse the repository at this point in the history
  • Loading branch information
jsstevenson committed Oct 26, 2023
1 parent dc4684f commit 9340df9
Show file tree
Hide file tree
Showing 41 changed files with 2,792 additions and 2,767 deletions.
3 changes: 0 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,6 @@ fixable = ["I", "F401"]
# *ignored for compatibility with formatter
ignore = ["D203", "D205", "D206", "D213", "D400", "D415", "ANN101", "ANN003", "E501", "Q", "W191"]

[tool.ruff.format]
quote-style = "single"

[tool.ruff.per-file-ignores]
# ANN001 - missing-type-function-argument
# ANN2 - missing-return-type
Expand Down
68 changes: 34 additions & 34 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,56 +17,56 @@ def pytest_collection_modifyitems(items):
When creating new test modules, be sure to add them here.
"""
MODULE_ORDER = [ # noqa: N806
'test_chembl',
'test_chemidplus',
'test_drugbank',
'test_drugsatfda',
'test_guidetopharmacology',
'test_hemonc',
'test_ncit',
'test_rxnorm',
'test_wikidata',
'test_merge',
'test_database',
'test_query',
'test_emit_warnings',
'test_disease_indication',
"test_chembl",
"test_chemidplus",
"test_drugbank",
"test_drugsatfda",
"test_guidetopharmacology",
"test_hemonc",
"test_ncit",
"test_rxnorm",
"test_wikidata",
"test_merge",
"test_database",
"test_query",
"test_emit_warnings",
"test_disease_indication",
]
items.sort(key=lambda i: MODULE_ORDER.index(i.module.__name__))


TEST_ROOT = Path(__file__).resolve().parents[1]
TEST_DATA_DIRECTORY = TEST_ROOT / 'tests' / 'data'
TEST_DATA_DIRECTORY = TEST_ROOT / "tests" / "data"


@pytest.fixture(scope='session')
@pytest.fixture(scope="session")
def test_data():
"""Provide test data location to test modules"""
return TEST_DATA_DIRECTORY


@pytest.fixture(scope='session', autouse=True)
@pytest.fixture(scope="session", autouse=True)
def db():
"""Provide a database instance to be used by tests."""
database = Database()
if os.environ.get('THERAPY_TEST', '').lower() == 'true':
if os.environ.get("THERAPY_TEST", "").lower() == "true":
if os.environ.get(AWS_ENV_VAR_NAME):
assert False, f'Cannot have both THERAPY_TEST and {AWS_ENV_VAR_NAME} set.'
existing_tables = database.dynamodb_client.list_tables()['TableNames']
if 'therapy_concepts' in existing_tables:
database.dynamodb_client.delete_table(TableName='therapy_concepts')
if 'therapy_metadata' in existing_tables:
database.dynamodb_client.delete_table(TableName='therapy_metadata')
existing_tables = database.dynamodb_client.list_tables()['TableNames']
assert False, f"Cannot have both THERAPY_TEST and {AWS_ENV_VAR_NAME} set."
existing_tables = database.dynamodb_client.list_tables()["TableNames"]
if "therapy_concepts" in existing_tables:
database.dynamodb_client.delete_table(TableName="therapy_concepts")
if "therapy_metadata" in existing_tables:
database.dynamodb_client.delete_table(TableName="therapy_metadata")
existing_tables = database.dynamodb_client.list_tables()["TableNames"]
database.create_therapies_table(existing_tables)
database.create_meta_data_table(existing_tables)
return database


@pytest.fixture(scope='session')
@pytest.fixture(scope="session")
def disease_normalizer():
"""Provide mock disease normalizer."""
with open(TEST_DATA_DIRECTORY / 'disease_normalization.json', 'r') as f:
with open(TEST_DATA_DIRECTORY / "disease_normalization.json", "r") as f:
disease_data = json.load(f)

def _normalize_disease(query: str):
Expand All @@ -75,7 +75,7 @@ def _normalize_disease(query: str):
return _normalize_disease


@pytest.fixture(scope='session')
@pytest.fixture(scope="session")
def test_source(db: Database, test_data: Path, disease_normalizer: Callable):
"""Provide query endpoint for testing sources. If THERAPY_TEST is set, will try to
load DB from test data.
Expand All @@ -84,7 +84,7 @@ def test_source(db: Database, test_data: Path, disease_normalizer: Callable):
"""

def test_source_factory(EtlClass: Base): # noqa: N803
if os.environ.get('THERAPY_TEST', '').lower() == 'true':
if os.environ.get("THERAPY_TEST", "").lower() == "true":
test_class = EtlClass(db, test_data) # type: ignore
test_class._normalize_disease = disease_normalizer # type: ignore
test_class.perform_etl(use_existing=True)
Expand Down Expand Up @@ -146,7 +146,7 @@ def _compare_records(actual: Drug, fixt: Drug):
assert actual_inds[i] == fixture_inds[i]


@pytest.fixture(scope='session')
@pytest.fixture(scope="session")
def compare_records():
"""Provide record comparison function"""
return _compare_records
Expand All @@ -171,11 +171,11 @@ def _compare_response(
fixture_list otherwise)
"""
if fixture and fixture_list:
raise Exception('Args provided for both `fixture` and `fixture_list`')
raise Exception("Args provided for both `fixture` and `fixture_list`")
elif not fixture and not fixture_list:
raise Exception('Must pass 1 of {fixture, fixture_list}')
raise Exception("Must pass 1 of {fixture, fixture_list}")
if fixture and num_records:
raise Exception('`num_records` should only be given with ' '`fixture_list`.')
raise Exception("`num_records` should only be given with " "`fixture_list`.")

assert response.match_type == match_type
if fixture:
Expand All @@ -195,7 +195,7 @@ def _compare_response(
assert False # test fixture not found in response


@pytest.fixture(scope='session')
@pytest.fixture(scope="session")
def compare_response():
"""Provide response comparison function"""
return _compare_response
2 changes: 1 addition & 1 deletion tests/scripts/build_chembl_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
ch = ChEMBL(Database())
ch._extract_data()

TEST_DATA_DIR = Path(__file__).resolve().parents[1] / 'data' / 'chembl'
TEST_DATA_DIR = Path(__file__).resolve().parents[1] / "data" / "chembl"
out_db_path = TEST_DATA_DIR / ch._src_file.name

try:
Expand Down
42 changes: 21 additions & 21 deletions tests/scripts/build_chemidplus_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,25 +7,25 @@
from therapy.etl import ChemIDplus

TEST_IDS = [
'87-08-1',
'152459-95-5',
'220127-57-1',
'15663-27-1',
'50-06-6',
'51186-83-5',
'8025-81-8',
'112901-68-5',
'20537-88-6',
"87-08-1",
"152459-95-5",
"220127-57-1",
"15663-27-1",
"50-06-6",
"51186-83-5",
"8025-81-8",
"112901-68-5",
"20537-88-6",
]

ch = ChemIDplus(Database())
ch._extract_data()
TEST_DATA_DIR = Path(__file__).resolve().parents[1] / 'data' / 'chemidplus'
TEST_DATA_DIR = Path(__file__).resolve().parents[1] / "data" / "chemidplus"
outfile_path = TEST_DATA_DIR / ch._src_file.name

root = ElementTree.Element('file')
root.set('name', ch._src_file.name)
root.set('date', ch._src_file.stem.split('chemidplus_')[1])
root = ElementTree.Element("file")
root.set("name", ch._src_file.name)
root.set("date", ch._src_file.stem.split("chemidplus_")[1])


def parse_xml(path: Path) -> Generator:
Expand All @@ -34,31 +34,31 @@ def parse_xml(path: Path) -> Generator:
:param str tag: XML tag
:return: generator yielding elements of corresponding tag
"""
context = iter(ElementTree.iterparse(path, events=('start', 'end')))
context = iter(ElementTree.iterparse(path, events=("start", "end")))
_, root = next(context)
for event, elem in context:
if event == 'end' and elem.tag == 'Chemical':
if event == "end" and elem.tag == "Chemical":
yield elem
root.clear()


parser = parse_xml(ch._src_file)
for chemical in parser:
regno = chemical.find('NumberList').find('CASRegistryNumber')
regno = chemical.find("NumberList").find("CASRegistryNumber")
if not regno:
continue

if regno.text in TEST_IDS:
root.append(chemical)

with open(outfile_path, 'w') as f:
ElementTree.ElementTree(root).write(f, encoding='unicode')
with open(outfile_path, "w") as f:
ElementTree.ElementTree(root).write(f, encoding="unicode")

pi = ElementTree.ProcessingInstruction(
target='xml version="1.0" encoding="UTF-8" standalone="yes"'
)
pi_string = ElementTree.tostring(pi).decode('UTF8')
with open(outfile_path, 'r+') as f:
pi_string = ElementTree.tostring(pi).decode("UTF8")
with open(outfile_path, "r+") as f:
content = f.read()
f.seek(0, 0)
f.write(pi_string.rstrip('\r\n') + '\n' + content)
f.write(pi_string.rstrip("\r\n") + "\n" + content)
8 changes: 4 additions & 4 deletions tests/scripts/build_disease_normalizer_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,13 @@
from therapy.etl import ChEMBL, HemOnc

TEST_ROOT = Path(__file__).resolve().parents[1]
TEST_DATA_DIRECTORY = TEST_ROOT / 'data'
TEST_DATA_DIRECTORY = TEST_ROOT / "data"


class ReadOnlyDatabase(Database):
"""Provide read-only instance of database for security's sake"""

def add_record(self, record: Dict, record_type: str = 'identity') -> None:
def add_record(self, record: Dict, record_type: str = "identity") -> None:
"""Add new record to database"""
pass

Expand All @@ -31,7 +31,7 @@ def update_record(
concept_id: str,
field: str,
new_value: Any, # noqa
item_type: str = 'identity',
item_type: str = "identity",
) -> None:
"""Update an individual record"""
pass
Expand Down Expand Up @@ -66,5 +66,5 @@ def normalize(self, query: str) -> DiseaseNormalizationService:
h.perform_etl(use_existing=True)


with open(TEST_DATA_DIRECTORY / 'disease_normalization.json', 'w') as f:
with open(TEST_DATA_DIRECTORY / "disease_normalization.json", "w") as f:
json.dump(disease_normalizer_table, f)
50 changes: 25 additions & 25 deletions tests/scripts/build_drugsatfda_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,39 +6,39 @@
from therapy.etl import DrugsAtFDA

TEST_IDS = [
'NDA020221',
'NDA022334',
'NDA050682',
'ANDA074656',
'ANDA075036',
'ANDA074735',
'ANDA206774',
'ANDA207323',
'NDA018057',
'ANDA072267',
'NDA017604',
'NDA210595',
'NDA202450',
'NDA091141',
'NDA022007',
'NDA050682',
'NDA017604',
'ANDA214475',
"NDA020221",
"NDA022334",
"NDA050682",
"ANDA074656",
"ANDA075036",
"ANDA074735",
"ANDA206774",
"ANDA207323",
"NDA018057",
"ANDA072267",
"NDA017604",
"NDA210595",
"NDA202450",
"NDA091141",
"NDA022007",
"NDA050682",
"NDA017604",
"ANDA214475",
]

daf = DrugsAtFDA(Database())
daf._extract_data()
TEST_DATA_DIR = Path(__file__).resolve().parents[1] / 'data' / 'drugsatfda'
TEST_DATA_DIR = Path(__file__).resolve().parents[1] / "data" / "drugsatfda"
outfile_path = TEST_DATA_DIR / daf._src_file.name

with open(daf._src_file, 'r') as f:
with open(daf._src_file, "r") as f:
data = json.load(f)

out_data = {'meta': data['meta'], 'results': []}
out_data = {"meta": data["meta"], "results": []}

for record in data['results']:
if record['application_number'] in TEST_IDS:
out_data['results'].append(record)
for record in data["results"]:
if record["application_number"] in TEST_IDS:
out_data["results"].append(record)

with open(outfile_path, 'w') as f:
with open(outfile_path, "w") as f:
json.dump(out_data, f)
20 changes: 10 additions & 10 deletions tests/scripts/build_gtop_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,38 +5,38 @@
from therapy.database import Database
from therapy.etl import GuideToPHARMACOLOGY

TEST_IDS = {'5343', '2169', '2804', '240', '3303', '5260'}
TEST_IDS = {"5343", "2169", "2804", "240", "3303", "5260"}

gtop = GuideToPHARMACOLOGY(Database())
gtop._extract_data()
TEST_DATA_DIR = Path(__file__).resolve().parents[1] / 'data' / 'guidetopharmacology'
TEST_DATA_DIR = Path(__file__).resolve().parents[1] / "data" / "guidetopharmacology"
ligands_file_path = TEST_DATA_DIR / gtop._ligands_file.name
mapping_file_path = TEST_DATA_DIR / gtop._mapping_file.name

ligands_rows = []
with open(gtop._ligands_file, 'r') as f:
reader = csv.reader(f, delimiter='\t')
with open(gtop._ligands_file, "r") as f:
reader = csv.reader(f, delimiter="\t")
ligands_rows.append(next(reader))
ligands_rows.append(next(reader))

for row in reader:
if row[0] in TEST_IDS:
ligands_rows.append(row)

with open(ligands_file_path, 'w') as f:
writer = csv.writer(f, delimiter='\t', quoting=csv.QUOTE_ALL)
with open(ligands_file_path, "w") as f:
writer = csv.writer(f, delimiter="\t", quoting=csv.QUOTE_ALL)
writer.writerows(ligands_rows)

map_rows = []
with open(gtop._mapping_file, 'r') as f:
reader = csv.reader(f, delimiter='\t')
with open(gtop._mapping_file, "r") as f:
reader = csv.reader(f, delimiter="\t")
map_rows.append(next(reader))
map_rows.append(next(reader))

for row in reader:
if row[0] in TEST_IDS:
map_rows.append(row)

with open(mapping_file_path, 'w') as f:
writer = csv.writer(f, delimiter='\t', quoting=csv.QUOTE_ALL)
with open(mapping_file_path, "w") as f:
writer = csv.writer(f, delimiter="\t", quoting=csv.QUOTE_ALL)
writer.writerows(map_rows)
Loading

0 comments on commit 9340df9

Please sign in to comment.