Skip to content

Commit

Permalink
Add curie query support (mychem version) (#34)
Browse files Browse the repository at this point in the history
* Add the biolink prefix mapping to the settings ...

Also separate the settings into a separate file for isolating the
configuration from client construction methods

* Add work for supporting CURIE query support ...

Specifically targets _getannotation and _getannotations methods at the
moment and only for the mygene instance

* Update the tox configuration to use more modern python

* Change the default value for the prefix mapping

* Add proper list formatting for the fields support

* Add proper multiple id/query argument handling

* Add expectedFailure decorators to the chem tests

* Remove the default the regex pattern matching ...

Not all of the different sites will work with that. Ran into issues
with that default pattern with myvariant. Instead we should view this
feature as specifically targetting support for the biolink prefixes
and nothing else

* Remove client side CURIE support ...

Pivot to adding testing and verification that the API / server side
implementation supports the CURIE ID

* Add the CURIE ID query testing for the mygene instance

* Remove unused pytest import

* Add the curie id tests with mychem as the target

* Revert modifications to the settings configuration

* Update the pyproject file

---------

Co-authored-by: Johnathan Schaff <jschaff@scripps.edu>
  • Loading branch information
ctrl-schaff and Johnathan Schaff authored May 29, 2024
1 parent dabafbe commit 4ad2dc6
Show file tree
Hide file tree
Showing 4 changed files with 130 additions and 12 deletions.
1 change: 1 addition & 0 deletions biothings_client/base.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Python Client for generic Biothings API services
"""

from __future__ import print_function

import logging
Expand Down
1 change: 1 addition & 0 deletions biothings_client/utils/gene.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
""" Gene specific utils. """

from collections import OrderedDict

from biothings_client import get_client
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ name="biothings_client"
authors = [
{name = "Cyrus Afrasiabi"},
{name = "Xinghua Zhou"},
{name = "Everaldo Rodolpho"},
{name = "Everaldo Rodolpho", email="erodolpho@scripps.edu"},
{name = "Chunlei Wu", email = "cwu@scripps.edu"},
]
requires-python = ">=2.7"
Expand Down
138 changes: 127 additions & 11 deletions tests/chem.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,121 @@ def test_getchem_with_fields(self):
self.assertTrue("pubchem" in c)
self.assertTrue("cid" in c["pubchem"])

def test_curie_id_query(self):
"""
Tests the annotation endpoint support for the biolink CURIE ID.
If support is enabled then we should retrieve the exact same document for all the provided
queries
"""
curie_id_testing_collection = [
("57966", "CHEMBL.COMPOUND:57966", "chembl.molecule_chembl_id:57966"),
(57966, "chembl.compound:57966", "chembl.molecule_chembl_id:57966"),
(57966, "CheMBL.compOUND:57966", "chembl.molecule_chembl_id:57966"),
("120933777", "PUBCHEM.COMPOUND:120933777", "pubchem.cid:120933777"),
(120933777, "pubchem.compound:120933777", "pubchem.cid:120933777"),
("120933777", "PuBcHEm.COMPound:120933777", "pubchem.cid:120933777"),
(57966, "CHEBI:57966", "chebi.id:57966"),
("57966", "chebi:57966", "chebi.id:57966"),
(57966, "CheBi:57966", "chebi.id:57966"),
("11P2JDE17B", "UNII:11P2JDE17B", "unii.unii:11P2JDE17B"),
("11P2JDE17B", "unii:11P2JDE17B", "unii.unii:11P2JDE17B"),
("11P2JDE17B", "uNIi:11P2JDE17B", "unii.unii:11P2JDE17B"),
("dB03107", "DRUGBANK:dB03107", "drugbank.id:dB03107"),
("dB03107", "drugbank:dB03107", "drugbank.id:dB03107"),
("dB03107", "DrugBaNK:dB03107", "drugbank.id:dB03107"),
]

results_aggregation = []
for id_query, biothings_query, biolink_query in curie_id_testing_collection:
id_query_result = self.mc.getchem(_id=id_query)
biothings_term_query_result = self.mc.getchem(_id=biothings_query)
biolink_term_query_result = self.mc.getchem(_id=biolink_query)
results_aggregation.append(
(
id_query_result == biothings_term_query_result,
id_query_result == biolink_term_query_result,
biothings_term_query_result == biolink_term_query_result,
)
)

results_validation = []
failure_messages = []
for result, test_query in zip(results_aggregation, curie_id_testing_collection):
cumulative_result = all(result)
if not cumulative_result:
failure_messages.append(f"Query Failure: {test_query} | Results: {result}")
results_validation.append(cumulative_result)

self.assertTrue(all(results_validation), msg="\n".join(failure_messages))

def test_multiple_curie_id_query(self):
"""
Tests the annotations endpoint support for the biolink CURIE ID.
Batch query testing against the POST endpoint to verify that the CURIE ID can work with
multiple
If support is enabled then we should retrieve the exact same document for all the provided
queries
"""
curie_id_testing_collection = [
("57966", "CHEMBL.COMPOUND:57966", "chembl.molecule_chembl_id:57966"),
(57966, "chembl.compound:57966", "chembl.molecule_chembl_id:57966"),
(57966, "CheMBL.compOUND:57966", "chembl.molecule_chembl_id:57966"),
("120933777", "PUBCHEM.COMPOUND:120933777", "pubchem.cid:120933777"),
(120933777, "pubchem.compound:120933777", "pubchem.cid:120933777"),
("120933777", "PuBcHEm.COMPound:120933777", "pubchem.cid:120933777"),
(57966, "CHEBI:57966", "chebi.id:57966"),
("57966", "chebi:57966", "chebi.id:57966"),
(57966, "CheBi:57966", "chebi.id:57966"),
("11P2JDE17B", "UNII:11P2JDE17B", "unii.unii:11P2JDE17B"),
("11P2JDE17B", "unii:11P2JDE17B", "unii.unii:11P2JDE17B"),
("11P2JDE17B", "uNIi:11P2JDE17B", "unii.unii:11P2JDE17B"),
("dB03107", "DRUGBANK:dB03107", "drugbank.id:dB03107"),
("dB03107", "drugbank:dB03107", "drugbank.id:dB03107"),
("dB03107", "DrugBaNK:dB03107", "drugbank.id:dB03107"),
]

results_aggregation = []
for id_query, biothings_query, biolink_query in curie_id_testing_collection:
base_result = self.mc.getchem(_id=id_query)

batch_query = [id_query, biothings_query, biolink_query]
query_results = self.mc.getchems(ids=batch_query)
assert len(query_results) == len(batch_query)

batch_id_query = query_results[0]
batch_biothings_query = query_results[1]
batch_biolink_query = query_results[2]

batch_id_query_return_value = batch_id_query.pop("query")
assert batch_id_query_return_value == str(id_query)

batch_biothings_query_return_value = batch_biothings_query.pop("query")
assert batch_biothings_query_return_value == str(biothings_query)

batch_biolink_query_return_value = batch_biolink_query.pop("query")
assert batch_biolink_query_return_value == str(biolink_query)

batch_result = (
base_result == batch_id_query,
base_result == batch_biothings_query,
base_result == batch_biolink_query,
)
results_aggregation.append(batch_result)

results_validation = []
failure_messages = []
for result, test_query in zip(results_aggregation, curie_id_testing_collection):
cumulative_result = all(result)
if not cumulative_result:
failure_messages.append(f"Query Failure: {test_query} | Results: {result}")
results_validation.append(cumulative_result)

self.assertTrue(all(results_validation), msg="\n".join(failure_messages))

@unittest.expectedFailure
def get_getdrug(self):
c = self.mc.getdrug("CHEMBL1308")
self.assertEqual(c["_id"], "ZRALSGWEFCBTJO-UHFFFAOYSA-N")
Expand All @@ -64,10 +179,10 @@ def get_getdrug(self):

# PubChem CID
# not working yet
# c = self.mc.getdrug("CID:1990")
# self.assertEqual(c['_id'], "ZRALSGWEFCBTJO-UHFFFAOYSA-N")
# c = self.mc.getdrug("1990")
# self.assertEqual(c['_id'], "ZRALSGWEFCBTJO-UHFFFAOYSA-N")
c = self.mc.getdrug("CID:1990")
self.assertEqual(c["_id"], "ZRALSGWEFCBTJO-UHFFFAOYSA-N")
c = self.mc.getdrug("1990")
self.assertEqual(c["_id"], "ZRALSGWEFCBTJO-UHFFFAOYSA-N")

def test_getchems(self):
c_li = self.mc.getchems(
Expand Down Expand Up @@ -102,16 +217,17 @@ def test_query_chembl(self):
self.assertEqual(len(qres["hits"]), 1)
self.assertEqual(qres["hits"][0]["_id"], "RRUDCFGSUDOHDG-UHFFFAOYSA-N")

@unittest.expectedFailure
def test_query_drugcentral(self):
qres = self.mc.query("drugcentral.drug_use.contraindication.umls_cui:C0023530", fields="drugcentral", size=50)
self.assertTrue("hits" in qres)
self.assertEqual(len(qres["hits"]), 50)

# not working yet
# qres = self.mc.query('drugcentral.xrefs.kegg_drug:D00220')
# self.assertTrue('hits' in qres)
# self.assertEqual(len(qres['hits']), 1)
# self.assertEqual(qres['hits'][0]['_id'], 'ZRALSGWEFCBTJO-UHFFFAOYSA-N')
qres = self.mc.query("drugcentral.xrefs.kegg_drug:D00220")
self.assertTrue("hits" in qres)
self.assertEqual(len(qres["hits"]), 1)
self.assertEqual(qres["hits"][0]["_id"], "ZRALSGWEFCBTJO-UHFFFAOYSA-N")

def test_query_pubchem(self):
qres = self.mc.query("pubchem.molecular_formula:C2H5NO2", fields="pubchem", size=20)
Expand Down Expand Up @@ -139,15 +255,15 @@ def test_query_ndc(self):
qres = self.mc.query('ndc.productndc:"27437-051"')
self.assertTrue("hits" in qres)
self.assertEqual(len(qres["hits"]), 1)
# Temp disable this check till we fix the data issue
# self.assertEqual(qres['hits'][0]['_id'], 'KPQZUUQMTUIKBP-UHFFFAOYSA-N')
self.assertEqual(qres["hits"][0]["_id"], "KPQZUUQMTUIKBP-UHFFFAOYSA-N")

@unittest.expectedFailure
def test_query_sider(self):
qres = self.mc.query("sider.meddra.umls_id:C0232487", fields="sider", size=5)
self.assertTrue("hits" in qres)
self.assertEqual(len(qres["hits"]), 5)
# Temp disable this check till we fix the data issue
# self.assertEqual(qres['hits'][0]['_id'], 'ZRALSGWEFCBTJO-UHFFFAOYSA-N')
self.assertEqual(qres["hits"][0]["_id"], "ZRALSGWEFCBTJO-UHFFFAOYSA-N")

def test_query_unii(self):
qres = self.mc.query("unii.unii:JU58VJ6Y3B")
Expand Down

0 comments on commit 4ad2dc6

Please sign in to comment.