diff --git a/src/therapy/etl/guidetopharmacology.py b/src/therapy/etl/guidetopharmacology.py index 44033330..9e3191b8 100644 --- a/src/therapy/etl/guidetopharmacology.py +++ b/src/therapy/etl/guidetopharmacology.py @@ -74,6 +74,7 @@ def _transform_ligands(self, data: dict) -> None: "PubChem CID", "UniProt ID", "Ensembl ID", + "ChEMBL ID", "Ligand Subunit IDs", "Ligand Subunit Name", "Ligand Subunit UniProt IDs", @@ -114,14 +115,16 @@ def _transform_ligands(self, data: dict) -> None: ) if row[10]: associated_with.append(f"{NamespacePrefix.UNIPROT.value}:{row[10]}") - if row[16]: - aliases.append(self._process_name(row[16])) # IUPAC + if row[12]: + associated_with.append(f"{NamespacePrefix.CHEMBL.value}:{row[12]}") if row[17]: - # International Non-proprietary Name assigned by the WHO - aliases.append(self._process_name(row[17])) + aliases.append(self._process_name(row[17])) # IUPAC if row[18]: + # International Non-proprietary Name assigned by the WHO + aliases.append(self._process_name(row[18])) + if row[19]: # synonyms - synonyms = row[18].split("|") + synonyms = row[19].split("|") for s in synonyms: if "&" in s and ";" in s: name_code = s[s.index("&") : s.index(";") + 1] @@ -130,9 +133,9 @@ def _transform_ligands(self, data: dict) -> None: s = s.replace(name_code, "") s = html.unescape(s) aliases.append(self._process_name(s)) - if row[20]: + if row[21]: associated_with.append( - f"{NamespacePrefix.INCHIKEY.value}:{row[20]}" + f"{NamespacePrefix.INCHIKEY.value}:{row[21]}" ) if associated_with: diff --git a/tests/data/guidetopharmacology/gtop_ligand_id_mapping_2023.2.tsv b/tests/data/guidetopharmacology/gtop_ligand_id_mapping_2024.3.tsv similarity index 95% rename from tests/data/guidetopharmacology/gtop_ligand_id_mapping_2023.2.tsv rename to tests/data/guidetopharmacology/gtop_ligand_id_mapping_2024.3.tsv index fb52225d..0818d2fc 100644 --- a/tests/data/guidetopharmacology/gtop_ligand_id_mapping_2023.2.tsv +++ b/tests/data/guidetopharmacology/gtop_ligand_id_mapping_2024.3.tsv @@ -1,4 +1,4 @@ -"# GtoPdb Version: 2023.2 - published: 2023-08-07" +"# GtoPdb Version: 2024.3 - published: 2024-10-03" "Ligand id" "Name" "Species" "Type" "PubChem SID" "PubChem CID" "ChEMBl ID" "Chebi ID" "UniProt id" "Ensembl ID" "IUPAC name" "INN" "CAS" "DrugBank ID" "Drug Central ID" "2169" "arginine vasotocin" "" "Peptide" "135652004" "68649" "" "" "" "" "L-cysteinyl-L-tyrosyl-(3S)-DL-isoleucyl-L-glutaminyl-L-asparagyl-L-cysteinyl-DL-prolyl-L-arginyl-glycinamide (1->6)-disulfide" "argiprestocin" "113-80-4" "" "" "240" "cisapride" "" "Synthetic organic" "135650104" "2769" "CHEMBL1729" "CHEBI:151790" "" "" "4-amino-5-chloro-N-[1-[3-(4-fluorophenoxy)propyl]-3-methoxypiperidin-4-yl]-2-methoxybenzamide" "cisapride" "81098-60-4" "DB00604" "660" diff --git a/tests/data/guidetopharmacology/gtop_ligands_2023.2.tsv b/tests/data/guidetopharmacology/gtop_ligands_2023.2.tsv deleted file mode 100644 index 13cca743..00000000 --- a/tests/data/guidetopharmacology/gtop_ligands_2023.2.tsv +++ /dev/null @@ -1,8 +0,0 @@ -"# GtoPdb Version: 2023.2 - published: 2023-08-07" -"Ligand ID" "Name" "Species" "Type" "Approved" "Withdrawn" "Labelled" "Radioactive" "PubChem SID" "PubChem CID" "UniProt ID" "Ensembl ID" "Ligand Subunit IDs" "Ligand Subunit Name" "Ligand Subunit UniProt IDs" "Ligand Subunit Ensembl IDs" "IUPAC name" "INN" "Synonyms" "SMILES" "InChIKey" "InChI" "GtoImmuPdb" "GtoMPdb" "Antibacterial" -"2169" "arginine vasotocin" "" "Peptide" "" "" "" "" "135652004" "68649" "" "" "" "" "" "" "L-cysteinyl-L-tyrosyl-(3S)-DL-isoleucyl-L-glutaminyl-L-asparagyl-L-cysteinyl-DL-prolyl-L-arginyl-glycinamide (1->6)-disulfide" "argiprestocin" "[Arg8]vasotocin|AVT" "CC[C@@H](C1NC(=O)[C@H](Cc2ccc(cc2)O)NC(=O)[C@@H](N)CSSC[C@H](NC(=O)[C@@H](NC(=O)[C@@H](NC1=O)CCC(=O)N)CC(=O)N)C(=O)N1CCCC1C(=O)N[C@H](C(=O)NCC(=O)N)CCCN=C(N)N)C" "OXDZADMCOWPSOC-ICBIOJHSSA-N" "InChI=1S/C43H67N15O12S2/c1-3-21(2)34-41(69)53-26(12-13-31(45)60)37(65)55-28(17-32(46)61)38(66)56-29(20-72-71-19-24(44)35(63)54-27(39(67)57-34)16-22-8-10-23(59)11-9-22)42(70)58-15-5-7-30(58)40(68)52-25(6-4-14-50-43(48)49)36(64)51-18-33(47)62/h8-11,21,24-30,34,59H,3-7,12-20,44H2,1-2H3,(H2,45,60)(H2,46,61)(H2,47,62)(H,51,64)(H,52,68)(H,53,69)(H,54,63)(H,55,65)(H,56,66)(H,57,67)(H4,48,49,50)/t21-,24-,25-,26-,27-,28-,29-,30?,34?/m0/s1" "" "" "" -"240" "cisapride" "" "Synthetic organic" "yes" "yes" "" "" "135650104" "2769" "" "" "" "" "" "" "4-amino-5-chloro-N-[1-[3-(4-fluorophenoxy)propyl]-3-methoxypiperidin-4-yl]-2-methoxybenzamide" "cisapride" "Prepulsid®|Propulsid®" "COC1CN(CCCOc2ccc(cc2)F)CCC1NC(=O)c1cc(Cl)c(cc1OC)N" "DCSUBABJRXZOMT-UHFFFAOYSA-N" "InChI=1S/C23H29ClFN3O4/c1-30-21-13-19(26)18(24)12-17(21)23(29)27-20-8-10-28(14-22(20)31-2)9-3-11-32-16-6-4-15(25)5-7-16/h4-7,12-13,20,22H,3,8-11,14,26H2,1-2H3,(H,27,29)" "" "" "" -"5343" "cisplatin" "" "Inorganic" "yes" "" "" "" "178102005" "441203" "" "" "" "" "" "" "" "cisplatin" "Platinol®" "" "" "" "" "" "" -"3303" "L745870" "" "Synthetic organic" "" "" "" "" "178100340" "5311200" "" "" "" "" "" "" "3-[[4-(4-chlorophenyl)piperazin-1-yl]methyl]-1H-pyrrolo[2,3-b]pyridine" "" "L 745870|L-745,870" "Clc1ccc(cc1)N1CCN(CC1)Cc1c[nH]c2c1cccn2" "OGJGQVFWEPNYSB-UHFFFAOYSA-N" "InChI=1S/C18H19ClN4/c19-15-3-5-16(6-4-15)23-10-8-22(9-11-23)13-14-12-21-18-17(14)2-1-7-20-18/h1-7,12H,8-11,13H2,(H,20,21)" "" "" "" -"2804" "phenobarbital" "" "Synthetic organic" "yes" "" "" "" "135650817" "4763" "" "" "" "" "" "" "5-ethyl-5-phenyl-1,3-diazinane-2,4,6-trione" "phenobarbital" "fenobarbital|Luminal®|phenobarb|phenobarbital sodium|phenobarbitone|phenylethylbarbiturate" "CCC1(C(=O)NC(=O)NC1=O)c1ccccc1" "DDBREPKUVSBGFI-UHFFFAOYSA-N" "InChI=1S/C12H12N2O3/c1-2-12(8-6-4-3-5-7-8)9(15)13-11(17)14-10(12)16/h3-7H,2H2,1H3,(H2,13,14,15,16,17)" "" "" "" -"5260" "rolipram" "" "Synthetic organic" "" "" "" "" "178101944" "5092" "" "" "" "" "" "" "4-[3-(cyclopentyloxy)-4-methoxyphenyl]pyrrolidin-2-one" "rolipram" "(±)-rolipram|(R,S)-rolipram" "COc1ccc(cc1OC1CCCC1)C1CNC(=O)C1" "HJORMJIFDVBMOB-UHFFFAOYSA-N" "InChI=1S/C16H21NO3/c1-19-14-7-6-11(12-9-16(18)17-10-12)8-15(14)20-13-4-2-3-5-13/h6-8,12-13H,2-5,9-10H2,1H3,(H,17,18)" "yes" "" "" diff --git a/tests/data/guidetopharmacology/gtop_ligands_2024.3.tsv b/tests/data/guidetopharmacology/gtop_ligands_2024.3.tsv new file mode 100644 index 00000000..245cfc25 --- /dev/null +++ b/tests/data/guidetopharmacology/gtop_ligands_2024.3.tsv @@ -0,0 +1,8 @@ +"# GtoPdb Version: 2024.3 - published: 2024-10-03" +"Ligand ID" "Name" "Species" "Type" "Approved" "Withdrawn" "Labelled" "Radioactive" "PubChem SID" "PubChem CID" "UniProt ID" "Ensembl ID" "ChEMBL ID" "Ligand Subunit IDs" "Ligand Subunit Name" "Ligand Subunit UniProt IDs" "Ligand Subunit Ensembl IDs" "IUPAC name" "INN" "Synonyms" "SMILES" "InChIKey" "InChI" "GtoImmuPdb" "GtoMPdb" "Antibacterial" +"2169" "arginine vasotocin" "" "Peptide" "" "" "" "" "135652004" "68649" "" "" "" "" "" "" "" "L-cysteinyl-L-tyrosyl-(3S)-DL-isoleucyl-L-glutaminyl-L-asparagyl-L-cysteinyl-DL-prolyl-L-arginyl-glycinamide (1->6)-disulfide" "argiprestocin" "[Arg8]vasotocin|AVT" "CC[C@@H](C1NC(=O)[C@H](Cc2ccc(cc2)O)NC(=O)[C@@H](N)CSSC[C@H](NC(=O)[C@@H](NC(=O)[C@@H](NC1=O)CCC(=O)N)CC(=O)N)C(=O)N1CCCC1C(=O)N[C@H](C(=O)NCC(=O)N)CCCN=C(N)N)C" "OXDZADMCOWPSOC-ICBIOJHSSA-N" "InChI=1S/C43H67N15O12S2/c1-3-21(2)34-41(69)53-26(12-13-31(45)60)37(65)55-28(17-32(46)61)38(66)56-29(20-72-71-19-24(44)35(63)54-27(39(67)57-34)16-22-8-10-23(59)11-9-22)42(70)58-15-5-7-30(58)40(68)52-25(6-4-14-50-43(48)49)36(64)51-18-33(47)62/h8-11,21,24-30,34,59H,3-7,12-20,44H2,1-2H3,(H2,45,60)(H2,46,61)(H2,47,62)(H,51,64)(H,52,68)(H,53,69)(H,54,63)(H,55,65)(H,56,66)(H,57,67)(H4,48,49,50)/t21-,24-,25-,26-,27-,28-,29-,30?,34?/m0/s1" "" "" "" +"240" "cisapride" "" "Synthetic organic" "yes" "yes" "" "" "135650104" "2769" "" "" "CHEMBL1729" "" "" "" "" "4-amino-5-chloro-N-[1-[3-(4-fluorophenoxy)propyl]-3-methoxypiperidin-4-yl]-2-methoxybenzamide" "cisapride" "Prepulsid®|Propulsid®" "COC1CN(CCCOc2ccc(cc2)F)CCC1NC(=O)c1cc(Cl)c(cc1OC)N" "DCSUBABJRXZOMT-UHFFFAOYSA-N" "InChI=1S/C23H29ClFN3O4/c1-30-21-13-19(26)18(24)12-17(21)23(29)27-20-8-10-28(14-22(20)31-2)9-3-11-32-16-6-4-15(25)5-7-16/h4-7,12-13,20,22H,3,8-11,14,26H2,1-2H3,(H,27,29)" "" "" "" +"5343" "cisplatin" "" "Inorganic" "yes" "" "" "" "178102005" "441203" "" "" "CHEMBL11359" "" "" "" "" "" "cisplatin" "Platinol®" "" "" "" "" "" "" +"3303" "L745870" "" "Synthetic organic" "" "" "" "" "178100340" "5311200" "" "" "CHEMBL267014" "" "" "" "" "3-[[4-(4-chlorophenyl)piperazin-1-yl]methyl]-1H-pyrrolo[2,3-b]pyridine" "" "L 745870|L-745,870" "Clc1ccc(cc1)N1CCN(CC1)Cc1c[nH]c2c1cccn2" "OGJGQVFWEPNYSB-UHFFFAOYSA-N" "InChI=1S/C18H19ClN4/c19-15-3-5-16(6-4-15)23-10-8-22(9-11-23)13-14-12-21-18-17(14)2-1-7-20-18/h1-7,12H,8-11,13H2,(H,20,21)" "" "" "" +"2804" "phenobarbital" "" "Synthetic organic" "yes" "" "" "" "135650817" "4763" "" "" "CHEMBL40" "" "" "" "" "5-ethyl-5-phenyl-1,3-diazinane-2,4,6-trione" "phenobarbital" "fenobarbital|Luminal®|phenobarb|phenobarbital sodium|phenobarbitone|phenylethylbarbiturate" "CCC1(C(=O)NC(=O)NC1=O)c1ccccc1" "DDBREPKUVSBGFI-UHFFFAOYSA-N" "InChI=1S/C12H12N2O3/c1-2-12(8-6-4-3-5-7-8)9(15)13-11(17)14-10(12)16/h3-7H,2H2,1H3,(H2,13,14,15,16,17)" "" "" "" +"5260" "rolipram" "" "Synthetic organic" "" "" "" "" "178101944" "5092" "" "" "CHEMBL63" "" "" "" "" "4-[3-(cyclopentyloxy)-4-methoxyphenyl]pyrrolidin-2-one" "rolipram" "(±)-rolipram|(R,S)-rolipram" "COc1ccc(cc1OC1CCCC1)C1CNC(=O)C1" "HJORMJIFDVBMOB-UHFFFAOYSA-N" "InChI=1S/C16H21NO3/c1-19-14-7-6-11(12-9-16(18)17-10-12)8-15(14)20-13-4-2-3-5-13/h6-8,12-13H,2-5,9-10H2,1H3,(H,17,18)" "yes" "" ""