Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: use latest GtoP data structure #438

Merged
merged 4 commits into from
Nov 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,10 @@ etl = [
"wikibaseintegrator>=0.12.0",
"wags-tails",
"tqdm",
"rich"
"rich",
"pyyaml"
]
test = ["pytest", "pytest-cov", "pytest-mock"]
test = ["pytest", "pytest-cov", "pytest-mock", "isodate"]
dev = ["pre-commit>=3.7.1", "ruff==0.5.0", "lxml", "xmlformatter", "types-pyyaml"]

[project.urls]
Expand Down
15 changes: 8 additions & 7 deletions src/therapy/etl/guidetopharmacology.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ def _transform_ligands(self, data: dict) -> None:
"PubChem CID",
"UniProt ID",
"Ensembl ID",
"ChEMBL ID",
"Ligand Subunit IDs",
"Ligand Subunit Name",
"Ligand Subunit UniProt IDs",
Expand Down Expand Up @@ -114,14 +115,14 @@ def _transform_ligands(self, data: dict) -> None:
)
if row[10]:
associated_with.append(f"{NamespacePrefix.UNIPROT.value}:{row[10]}")
if row[16]:
aliases.append(self._process_name(row[16])) # IUPAC
if row[17]:
# International Non-proprietary Name assigned by the WHO
aliases.append(self._process_name(row[17]))
aliases.append(self._process_name(row[17])) # IUPAC
if row[18]:
# International Non-proprietary Name assigned by the WHO
aliases.append(self._process_name(row[18]))
if row[19]:
# synonyms
synonyms = row[18].split("|")
synonyms = row[19].split("|")
for s in synonyms:
if "&" in s and ";" in s:
name_code = s[s.index("&") : s.index(";") + 1]
Expand All @@ -130,9 +131,9 @@ def _transform_ligands(self, data: dict) -> None:
s = s.replace(name_code, "")
s = html.unescape(s)
aliases.append(self._process_name(s))
if row[20]:
if row[21]:
associated_with.append(
f"{NamespacePrefix.INCHIKEY.value}:{row[20]}"
f"{NamespacePrefix.INCHIKEY.value}:{row[21]}"
)

if associated_with:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"# GtoPdb Version: 2023.2 - published: 2023-08-07"
"# GtoPdb Version: 2024.3 - published: 2024-10-03"
"Ligand id" "Name" "Species" "Type" "PubChem SID" "PubChem CID" "ChEMBl ID" "Chebi ID" "UniProt id" "Ensembl ID" "IUPAC name" "INN" "CAS" "DrugBank ID" "Drug Central ID"
"2169" "arginine vasotocin" "" "Peptide" "135652004" "68649" "" "" "" "" "L-cysteinyl-L-tyrosyl-(3S)-DL-isoleucyl-L-glutaminyl-L-asparagyl-L-cysteinyl-DL-prolyl-L-arginyl-glycinamide (1->6)-disulfide" "argiprestocin" "113-80-4" "" ""
"240" "cisapride" "" "Synthetic organic" "135650104" "2769" "CHEMBL1729" "CHEBI:151790" "" "" "4-amino-5-chloro-N-[1-[3-(4-fluorophenoxy)propyl]-3-methoxypiperidin-4-yl]-2-methoxybenzamide" "cisapride" "81098-60-4" "DB00604" "660"
Expand Down
8 changes: 0 additions & 8 deletions tests/data/guidetopharmacology/gtop_ligands_2023.2.tsv

This file was deleted.

8 changes: 8 additions & 0 deletions tests/data/guidetopharmacology/gtop_ligands_2024.3.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"# GtoPdb Version: 2024.3 - published: 2024-10-03"
"Ligand ID" "Name" "Species" "Type" "Approved" "Withdrawn" "Labelled" "Radioactive" "PubChem SID" "PubChem CID" "UniProt ID" "Ensembl ID" "ChEMBL ID" "Ligand Subunit IDs" "Ligand Subunit Name" "Ligand Subunit UniProt IDs" "Ligand Subunit Ensembl IDs" "IUPAC name" "INN" "Synonyms" "SMILES" "InChIKey" "InChI" "GtoImmuPdb" "GtoMPdb" "Antibacterial"
"2169" "arginine vasotocin" "" "Peptide" "" "" "" "" "135652004" "68649" "" "" "" "" "" "" "" "L-cysteinyl-L-tyrosyl-(3S)-DL-isoleucyl-L-glutaminyl-L-asparagyl-L-cysteinyl-DL-prolyl-L-arginyl-glycinamide (1->6)-disulfide" "argiprestocin" "[Arg<sup>8</sup>]vasotocin|AVT" "CC[C@@H](C1NC(=O)[C@H](Cc2ccc(cc2)O)NC(=O)[C@@H](N)CSSC[C@H](NC(=O)[C@@H](NC(=O)[C@@H](NC1=O)CCC(=O)N)CC(=O)N)C(=O)N1CCCC1C(=O)N[C@H](C(=O)NCC(=O)N)CCCN=C(N)N)C" "OXDZADMCOWPSOC-ICBIOJHSSA-N" "InChI=1S/C43H67N15O12S2/c1-3-21(2)34-41(69)53-26(12-13-31(45)60)37(65)55-28(17-32(46)61)38(66)56-29(20-72-71-19-24(44)35(63)54-27(39(67)57-34)16-22-8-10-23(59)11-9-22)42(70)58-15-5-7-30(58)40(68)52-25(6-4-14-50-43(48)49)36(64)51-18-33(47)62/h8-11,21,24-30,34,59H,3-7,12-20,44H2,1-2H3,(H2,45,60)(H2,46,61)(H2,47,62)(H,51,64)(H,52,68)(H,53,69)(H,54,63)(H,55,65)(H,56,66)(H,57,67)(H4,48,49,50)/t21-,24-,25-,26-,27-,28-,29-,30?,34?/m0/s1" "" "" ""
"240" "cisapride" "" "Synthetic organic" "yes" "yes" "" "" "135650104" "2769" "" "" "CHEMBL1729" "" "" "" "" "4-amino-5-chloro-N-[1-[3-(4-fluorophenoxy)propyl]-3-methoxypiperidin-4-yl]-2-methoxybenzamide" "cisapride" "Prepulsid&reg;|Propulsid&reg;" "COC1CN(CCCOc2ccc(cc2)F)CCC1NC(=O)c1cc(Cl)c(cc1OC)N" "DCSUBABJRXZOMT-UHFFFAOYSA-N" "InChI=1S/C23H29ClFN3O4/c1-30-21-13-19(26)18(24)12-17(21)23(29)27-20-8-10-28(14-22(20)31-2)9-3-11-32-16-6-4-15(25)5-7-16/h4-7,12-13,20,22H,3,8-11,14,26H2,1-2H3,(H,27,29)" "" "" ""
"5343" "cisplatin" "" "Inorganic" "yes" "" "" "" "178102005" "441203" "" "" "CHEMBL11359" "" "" "" "" "" "cisplatin" "Platinol&reg;" "" "" "" "" "" ""
"3303" "L745870" "" "Synthetic organic" "" "" "" "" "178100340" "5311200" "" "" "CHEMBL267014" "" "" "" "" "3-[[4-(4-chlorophenyl)piperazin-1-yl]methyl]-1H-pyrrolo[2,3-b]pyridine" "" "L 745870|L-745,870" "Clc1ccc(cc1)N1CCN(CC1)Cc1c[nH]c2c1cccn2" "OGJGQVFWEPNYSB-UHFFFAOYSA-N" "InChI=1S/C18H19ClN4/c19-15-3-5-16(6-4-15)23-10-8-22(9-11-23)13-14-12-21-18-17(14)2-1-7-20-18/h1-7,12H,8-11,13H2,(H,20,21)" "" "" ""
"2804" "phenobarbital" "" "Synthetic organic" "yes" "" "" "" "135650817" "4763" "" "" "CHEMBL40" "" "" "" "" "5-ethyl-5-phenyl-1,3-diazinane-2,4,6-trione" "phenobarbital" "fenobarbital|Luminal&reg;|phenobarb|phenobarbital sodium|phenobarbitone|phenylethylbarbiturate" "CCC1(C(=O)NC(=O)NC1=O)c1ccccc1" "DDBREPKUVSBGFI-UHFFFAOYSA-N" "InChI=1S/C12H12N2O3/c1-2-12(8-6-4-3-5-7-8)9(15)13-11(17)14-10(12)16/h3-7H,2H2,1H3,(H2,13,14,15,16,17)" "" "" ""
"5260" "rolipram" "" "Synthetic organic" "" "" "" "" "178101944" "5092" "" "" "CHEMBL63" "" "" "" "" "4-[3-(cyclopentyloxy)-4-methoxyphenyl]pyrrolidin-2-one" "rolipram" "(&plusmn;)-rolipram|(R,S)-rolipram" "COc1ccc(cc1OC1CCCC1)C1CNC(=O)C1" "HJORMJIFDVBMOB-UHFFFAOYSA-N" "InChI=1S/C16H21NO3/c1-19-14-7-6-11(12-9-16(18)17-10-12)8-15(14)20-13-4-2-3-5-13/h6-8,12-13H,2-5,9-10H2,1H3,(H,17,18)" "yes" "" ""