From a788ffe8b797e66800c1dffaf6a9f216852221fe Mon Sep 17 00:00:00 2001 From: James Stevenson Date: Wed, 13 Nov 2024 12:26:00 -0500 Subject: [PATCH] fix: use latest GtoP data structure (#438) No real data changes, just need to alter some indices --- src/therapy/etl/guidetopharmacology.py | 15 ++++++++------- ...23.2.tsv => gtop_ligand_id_mapping_2024.3.tsv} | 2 +- .../guidetopharmacology/gtop_ligands_2023.2.tsv | 8 -------- .../guidetopharmacology/gtop_ligands_2024.3.tsv | 8 ++++++++ 4 files changed, 17 insertions(+), 16 deletions(-) rename tests/data/guidetopharmacology/{gtop_ligand_id_mapping_2023.2.tsv => gtop_ligand_id_mapping_2024.3.tsv} (95%) delete mode 100644 tests/data/guidetopharmacology/gtop_ligands_2023.2.tsv create mode 100644 tests/data/guidetopharmacology/gtop_ligands_2024.3.tsv diff --git a/src/therapy/etl/guidetopharmacology.py b/src/therapy/etl/guidetopharmacology.py index 44033330..d4aef516 100644 --- a/src/therapy/etl/guidetopharmacology.py +++ b/src/therapy/etl/guidetopharmacology.py @@ -74,6 +74,7 @@ def _transform_ligands(self, data: dict) -> None: "PubChem CID", "UniProt ID", "Ensembl ID", + "ChEMBL ID", "Ligand Subunit IDs", "Ligand Subunit Name", "Ligand Subunit UniProt IDs", @@ -114,14 +115,14 @@ def _transform_ligands(self, data: dict) -> None: ) if row[10]: associated_with.append(f"{NamespacePrefix.UNIPROT.value}:{row[10]}") - if row[16]: - aliases.append(self._process_name(row[16])) # IUPAC if row[17]: - # International Non-proprietary Name assigned by the WHO - aliases.append(self._process_name(row[17])) + aliases.append(self._process_name(row[17])) # IUPAC if row[18]: + # International Non-proprietary Name assigned by the WHO + aliases.append(self._process_name(row[18])) + if row[19]: # synonyms - synonyms = row[18].split("|") + synonyms = row[19].split("|") for s in synonyms: if "&" in s and ";" in s: name_code = s[s.index("&") : s.index(";") + 1] @@ -130,9 +131,9 @@ def _transform_ligands(self, data: dict) -> None: s = s.replace(name_code, "") s = html.unescape(s) aliases.append(self._process_name(s)) - if row[20]: + if row[21]: associated_with.append( - f"{NamespacePrefix.INCHIKEY.value}:{row[20]}" + f"{NamespacePrefix.INCHIKEY.value}:{row[21]}" ) if associated_with: diff --git a/tests/data/guidetopharmacology/gtop_ligand_id_mapping_2023.2.tsv b/tests/data/guidetopharmacology/gtop_ligand_id_mapping_2024.3.tsv similarity index 95% rename from tests/data/guidetopharmacology/gtop_ligand_id_mapping_2023.2.tsv rename to tests/data/guidetopharmacology/gtop_ligand_id_mapping_2024.3.tsv index fb52225d..0818d2fc 100644 --- a/tests/data/guidetopharmacology/gtop_ligand_id_mapping_2023.2.tsv +++ b/tests/data/guidetopharmacology/gtop_ligand_id_mapping_2024.3.tsv @@ -1,4 +1,4 @@ -"# GtoPdb Version: 2023.2 - published: 2023-08-07" +"# GtoPdb Version: 2024.3 - published: 2024-10-03" "Ligand id" "Name" "Species" "Type" "PubChem SID" "PubChem CID" "ChEMBl ID" "Chebi ID" "UniProt id" "Ensembl ID" "IUPAC name" "INN" "CAS" "DrugBank ID" "Drug Central ID" "2169" "arginine vasotocin" "" "Peptide" "135652004" "68649" "" "" "" "" "L-cysteinyl-L-tyrosyl-(3S)-DL-isoleucyl-L-glutaminyl-L-asparagyl-L-cysteinyl-DL-prolyl-L-arginyl-glycinamide (1->6)-disulfide" "argiprestocin" "113-80-4" "" "" "240" "cisapride" "" "Synthetic organic" "135650104" "2769" "CHEMBL1729" "CHEBI:151790" "" "" "4-amino-5-chloro-N-[1-[3-(4-fluorophenoxy)propyl]-3-methoxypiperidin-4-yl]-2-methoxybenzamide" "cisapride" "81098-60-4" "DB00604" "660" diff --git a/tests/data/guidetopharmacology/gtop_ligands_2023.2.tsv b/tests/data/guidetopharmacology/gtop_ligands_2023.2.tsv deleted file mode 100644 index 13cca743..00000000 --- a/tests/data/guidetopharmacology/gtop_ligands_2023.2.tsv +++ /dev/null @@ -1,8 +0,0 @@ -"# GtoPdb Version: 2023.2 - published: 2023-08-07" -"Ligand ID" "Name" "Species" "Type" "Approved" "Withdrawn" "Labelled" "Radioactive" "PubChem SID" "PubChem CID" "UniProt ID" "Ensembl ID" "Ligand Subunit IDs" "Ligand Subunit Name" "Ligand Subunit UniProt IDs" "Ligand Subunit Ensembl IDs" "IUPAC name" "INN" "Synonyms" "SMILES" "InChIKey" "InChI" "GtoImmuPdb" "GtoMPdb" "Antibacterial" -"2169" "arginine vasotocin" "" "Peptide" "" "" "" "" "135652004" "68649" "" "" "" "" "" "" "L-cysteinyl-L-tyrosyl-(3S)-DL-isoleucyl-L-glutaminyl-L-asparagyl-L-cysteinyl-DL-prolyl-L-arginyl-glycinamide (1->6)-disulfide" "argiprestocin" "[Arg8]vasotocin|AVT" "CC[C@@H](C1NC(=O)[C@H](Cc2ccc(cc2)O)NC(=O)[C@@H](N)CSSC[C@H](NC(=O)[C@@H](NC(=O)[C@@H](NC1=O)CCC(=O)N)CC(=O)N)C(=O)N1CCCC1C(=O)N[C@H](C(=O)NCC(=O)N)CCCN=C(N)N)C" "OXDZADMCOWPSOC-ICBIOJHSSA-N" "InChI=1S/C43H67N15O12S2/c1-3-21(2)34-41(69)53-26(12-13-31(45)60)37(65)55-28(17-32(46)61)38(66)56-29(20-72-71-19-24(44)35(63)54-27(39(67)57-34)16-22-8-10-23(59)11-9-22)42(70)58-15-5-7-30(58)40(68)52-25(6-4-14-50-43(48)49)36(64)51-18-33(47)62/h8-11,21,24-30,34,59H,3-7,12-20,44H2,1-2H3,(H2,45,60)(H2,46,61)(H2,47,62)(H,51,64)(H,52,68)(H,53,69)(H,54,63)(H,55,65)(H,56,66)(H,57,67)(H4,48,49,50)/t21-,24-,25-,26-,27-,28-,29-,30?,34?/m0/s1" "" "" "" -"240" "cisapride" "" "Synthetic organic" "yes" "yes" "" "" "135650104" "2769" "" "" "" "" "" "" "4-amino-5-chloro-N-[1-[3-(4-fluorophenoxy)propyl]-3-methoxypiperidin-4-yl]-2-methoxybenzamide" "cisapride" "Prepulsid®|Propulsid®" "COC1CN(CCCOc2ccc(cc2)F)CCC1NC(=O)c1cc(Cl)c(cc1OC)N" "DCSUBABJRXZOMT-UHFFFAOYSA-N" "InChI=1S/C23H29ClFN3O4/c1-30-21-13-19(26)18(24)12-17(21)23(29)27-20-8-10-28(14-22(20)31-2)9-3-11-32-16-6-4-15(25)5-7-16/h4-7,12-13,20,22H,3,8-11,14,26H2,1-2H3,(H,27,29)" "" "" "" -"5343" "cisplatin" "" "Inorganic" "yes" "" "" "" "178102005" "441203" "" "" "" "" "" "" "" "cisplatin" "Platinol®" "" "" "" "" "" "" -"3303" "L745870" "" "Synthetic organic" "" "" "" "" "178100340" "5311200" "" "" "" "" "" "" "3-[[4-(4-chlorophenyl)piperazin-1-yl]methyl]-1H-pyrrolo[2,3-b]pyridine" "" "L 745870|L-745,870" "Clc1ccc(cc1)N1CCN(CC1)Cc1c[nH]c2c1cccn2" "OGJGQVFWEPNYSB-UHFFFAOYSA-N" "InChI=1S/C18H19ClN4/c19-15-3-5-16(6-4-15)23-10-8-22(9-11-23)13-14-12-21-18-17(14)2-1-7-20-18/h1-7,12H,8-11,13H2,(H,20,21)" "" "" "" -"2804" "phenobarbital" "" "Synthetic organic" "yes" "" "" "" "135650817" "4763" "" "" "" "" "" "" "5-ethyl-5-phenyl-1,3-diazinane-2,4,6-trione" "phenobarbital" "fenobarbital|Luminal®|phenobarb|phenobarbital sodium|phenobarbitone|phenylethylbarbiturate" "CCC1(C(=O)NC(=O)NC1=O)c1ccccc1" "DDBREPKUVSBGFI-UHFFFAOYSA-N" "InChI=1S/C12H12N2O3/c1-2-12(8-6-4-3-5-7-8)9(15)13-11(17)14-10(12)16/h3-7H,2H2,1H3,(H2,13,14,15,16,17)" "" "" "" -"5260" "rolipram" "" "Synthetic organic" "" "" "" "" "178101944" "5092" "" "" "" "" "" "" "4-[3-(cyclopentyloxy)-4-methoxyphenyl]pyrrolidin-2-one" "rolipram" "(±)-rolipram|(R,S)-rolipram" "COc1ccc(cc1OC1CCCC1)C1CNC(=O)C1" "HJORMJIFDVBMOB-UHFFFAOYSA-N" "InChI=1S/C16H21NO3/c1-19-14-7-6-11(12-9-16(18)17-10-12)8-15(14)20-13-4-2-3-5-13/h6-8,12-13H,2-5,9-10H2,1H3,(H,17,18)" "yes" "" "" diff --git a/tests/data/guidetopharmacology/gtop_ligands_2024.3.tsv b/tests/data/guidetopharmacology/gtop_ligands_2024.3.tsv new file mode 100644 index 00000000..245cfc25 --- /dev/null +++ b/tests/data/guidetopharmacology/gtop_ligands_2024.3.tsv @@ -0,0 +1,8 @@ +"# GtoPdb Version: 2024.3 - published: 2024-10-03" +"Ligand ID" "Name" "Species" "Type" "Approved" "Withdrawn" "Labelled" "Radioactive" "PubChem SID" "PubChem CID" "UniProt ID" "Ensembl ID" "ChEMBL ID" "Ligand Subunit IDs" "Ligand Subunit Name" "Ligand Subunit UniProt IDs" "Ligand Subunit Ensembl IDs" "IUPAC name" "INN" "Synonyms" "SMILES" "InChIKey" "InChI" "GtoImmuPdb" "GtoMPdb" "Antibacterial" +"2169" "arginine vasotocin" "" "Peptide" "" "" "" "" "135652004" "68649" "" "" "" "" "" "" "" "L-cysteinyl-L-tyrosyl-(3S)-DL-isoleucyl-L-glutaminyl-L-asparagyl-L-cysteinyl-DL-prolyl-L-arginyl-glycinamide (1->6)-disulfide" "argiprestocin" "[Arg8]vasotocin|AVT" "CC[C@@H](C1NC(=O)[C@H](Cc2ccc(cc2)O)NC(=O)[C@@H](N)CSSC[C@H](NC(=O)[C@@H](NC(=O)[C@@H](NC1=O)CCC(=O)N)CC(=O)N)C(=O)N1CCCC1C(=O)N[C@H](C(=O)NCC(=O)N)CCCN=C(N)N)C" "OXDZADMCOWPSOC-ICBIOJHSSA-N" "InChI=1S/C43H67N15O12S2/c1-3-21(2)34-41(69)53-26(12-13-31(45)60)37(65)55-28(17-32(46)61)38(66)56-29(20-72-71-19-24(44)35(63)54-27(39(67)57-34)16-22-8-10-23(59)11-9-22)42(70)58-15-5-7-30(58)40(68)52-25(6-4-14-50-43(48)49)36(64)51-18-33(47)62/h8-11,21,24-30,34,59H,3-7,12-20,44H2,1-2H3,(H2,45,60)(H2,46,61)(H2,47,62)(H,51,64)(H,52,68)(H,53,69)(H,54,63)(H,55,65)(H,56,66)(H,57,67)(H4,48,49,50)/t21-,24-,25-,26-,27-,28-,29-,30?,34?/m0/s1" "" "" "" +"240" "cisapride" "" "Synthetic organic" "yes" "yes" "" "" "135650104" "2769" "" "" "CHEMBL1729" "" "" "" "" "4-amino-5-chloro-N-[1-[3-(4-fluorophenoxy)propyl]-3-methoxypiperidin-4-yl]-2-methoxybenzamide" "cisapride" "Prepulsid®|Propulsid®" "COC1CN(CCCOc2ccc(cc2)F)CCC1NC(=O)c1cc(Cl)c(cc1OC)N" "DCSUBABJRXZOMT-UHFFFAOYSA-N" "InChI=1S/C23H29ClFN3O4/c1-30-21-13-19(26)18(24)12-17(21)23(29)27-20-8-10-28(14-22(20)31-2)9-3-11-32-16-6-4-15(25)5-7-16/h4-7,12-13,20,22H,3,8-11,14,26H2,1-2H3,(H,27,29)" "" "" "" +"5343" "cisplatin" "" "Inorganic" "yes" "" "" "" "178102005" "441203" "" "" "CHEMBL11359" "" "" "" "" "" "cisplatin" "Platinol®" "" "" "" "" "" "" +"3303" "L745870" "" "Synthetic organic" "" "" "" "" "178100340" "5311200" "" "" "CHEMBL267014" "" "" "" "" "3-[[4-(4-chlorophenyl)piperazin-1-yl]methyl]-1H-pyrrolo[2,3-b]pyridine" "" "L 745870|L-745,870" "Clc1ccc(cc1)N1CCN(CC1)Cc1c[nH]c2c1cccn2" "OGJGQVFWEPNYSB-UHFFFAOYSA-N" "InChI=1S/C18H19ClN4/c19-15-3-5-16(6-4-15)23-10-8-22(9-11-23)13-14-12-21-18-17(14)2-1-7-20-18/h1-7,12H,8-11,13H2,(H,20,21)" "" "" "" +"2804" "phenobarbital" "" "Synthetic organic" "yes" "" "" "" "135650817" "4763" "" "" "CHEMBL40" "" "" "" "" "5-ethyl-5-phenyl-1,3-diazinane-2,4,6-trione" "phenobarbital" "fenobarbital|Luminal®|phenobarb|phenobarbital sodium|phenobarbitone|phenylethylbarbiturate" "CCC1(C(=O)NC(=O)NC1=O)c1ccccc1" "DDBREPKUVSBGFI-UHFFFAOYSA-N" "InChI=1S/C12H12N2O3/c1-2-12(8-6-4-3-5-7-8)9(15)13-11(17)14-10(12)16/h3-7H,2H2,1H3,(H2,13,14,15,16,17)" "" "" "" +"5260" "rolipram" "" "Synthetic organic" "" "" "" "" "178101944" "5092" "" "" "CHEMBL63" "" "" "" "" "4-[3-(cyclopentyloxy)-4-methoxyphenyl]pyrrolidin-2-one" "rolipram" "(±)-rolipram|(R,S)-rolipram" "COc1ccc(cc1OC1CCCC1)C1CNC(=O)C1" "HJORMJIFDVBMOB-UHFFFAOYSA-N" "InChI=1S/C16H21NO3/c1-19-14-7-6-11(12-9-16(18)17-10-12)8-15(14)20-13-4-2-3-5-13/h6-8,12-13H,2-5,9-10H2,1H3,(H,17,18)" "yes" "" ""