diff --git a/src/therapy/etl/guidetopharmacology.py b/src/therapy/etl/guidetopharmacology.py
index 44033330..9e3191b8 100644
--- a/src/therapy/etl/guidetopharmacology.py
+++ b/src/therapy/etl/guidetopharmacology.py
@@ -74,6 +74,7 @@ def _transform_ligands(self, data: dict) -> None:
"PubChem CID",
"UniProt ID",
"Ensembl ID",
+ "ChEMBL ID",
"Ligand Subunit IDs",
"Ligand Subunit Name",
"Ligand Subunit UniProt IDs",
@@ -114,14 +115,16 @@ def _transform_ligands(self, data: dict) -> None:
)
if row[10]:
associated_with.append(f"{NamespacePrefix.UNIPROT.value}:{row[10]}")
- if row[16]:
- aliases.append(self._process_name(row[16])) # IUPAC
+ if row[12]:
+ associated_with.append(f"{NamespacePrefix.CHEMBL.value}:{row[12]}")
if row[17]:
- # International Non-proprietary Name assigned by the WHO
- aliases.append(self._process_name(row[17]))
+ aliases.append(self._process_name(row[17])) # IUPAC
if row[18]:
+ # International Non-proprietary Name assigned by the WHO
+ aliases.append(self._process_name(row[18]))
+ if row[19]:
# synonyms
- synonyms = row[18].split("|")
+ synonyms = row[19].split("|")
for s in synonyms:
if "&" in s and ";" in s:
name_code = s[s.index("&") : s.index(";") + 1]
@@ -130,9 +133,9 @@ def _transform_ligands(self, data: dict) -> None:
s = s.replace(name_code, "")
s = html.unescape(s)
aliases.append(self._process_name(s))
- if row[20]:
+ if row[21]:
associated_with.append(
- f"{NamespacePrefix.INCHIKEY.value}:{row[20]}"
+ f"{NamespacePrefix.INCHIKEY.value}:{row[21]}"
)
if associated_with:
diff --git a/tests/data/guidetopharmacology/gtop_ligand_id_mapping_2023.2.tsv b/tests/data/guidetopharmacology/gtop_ligand_id_mapping_2024.3.tsv
similarity index 95%
rename from tests/data/guidetopharmacology/gtop_ligand_id_mapping_2023.2.tsv
rename to tests/data/guidetopharmacology/gtop_ligand_id_mapping_2024.3.tsv
index fb52225d..0818d2fc 100644
--- a/tests/data/guidetopharmacology/gtop_ligand_id_mapping_2023.2.tsv
+++ b/tests/data/guidetopharmacology/gtop_ligand_id_mapping_2024.3.tsv
@@ -1,4 +1,4 @@
-"# GtoPdb Version: 2023.2 - published: 2023-08-07"
+"# GtoPdb Version: 2024.3 - published: 2024-10-03"
"Ligand id" "Name" "Species" "Type" "PubChem SID" "PubChem CID" "ChEMBl ID" "Chebi ID" "UniProt id" "Ensembl ID" "IUPAC name" "INN" "CAS" "DrugBank ID" "Drug Central ID"
"2169" "arginine vasotocin" "" "Peptide" "135652004" "68649" "" "" "" "" "L-cysteinyl-L-tyrosyl-(3S)-DL-isoleucyl-L-glutaminyl-L-asparagyl-L-cysteinyl-DL-prolyl-L-arginyl-glycinamide (1->6)-disulfide" "argiprestocin" "113-80-4" "" ""
"240" "cisapride" "" "Synthetic organic" "135650104" "2769" "CHEMBL1729" "CHEBI:151790" "" "" "4-amino-5-chloro-N-[1-[3-(4-fluorophenoxy)propyl]-3-methoxypiperidin-4-yl]-2-methoxybenzamide" "cisapride" "81098-60-4" "DB00604" "660"
diff --git a/tests/data/guidetopharmacology/gtop_ligands_2023.2.tsv b/tests/data/guidetopharmacology/gtop_ligands_2023.2.tsv
deleted file mode 100644
index 13cca743..00000000
--- a/tests/data/guidetopharmacology/gtop_ligands_2023.2.tsv
+++ /dev/null
@@ -1,8 +0,0 @@
-"# GtoPdb Version: 2023.2 - published: 2023-08-07"
-"Ligand ID" "Name" "Species" "Type" "Approved" "Withdrawn" "Labelled" "Radioactive" "PubChem SID" "PubChem CID" "UniProt ID" "Ensembl ID" "Ligand Subunit IDs" "Ligand Subunit Name" "Ligand Subunit UniProt IDs" "Ligand Subunit Ensembl IDs" "IUPAC name" "INN" "Synonyms" "SMILES" "InChIKey" "InChI" "GtoImmuPdb" "GtoMPdb" "Antibacterial"
-"2169" "arginine vasotocin" "" "Peptide" "" "" "" "" "135652004" "68649" "" "" "" "" "" "" "L-cysteinyl-L-tyrosyl-(3S)-DL-isoleucyl-L-glutaminyl-L-asparagyl-L-cysteinyl-DL-prolyl-L-arginyl-glycinamide (1->6)-disulfide" "argiprestocin" "[Arg8]vasotocin|AVT" "CC[C@@H](C1NC(=O)[C@H](Cc2ccc(cc2)O)NC(=O)[C@@H](N)CSSC[C@H](NC(=O)[C@@H](NC(=O)[C@@H](NC1=O)CCC(=O)N)CC(=O)N)C(=O)N1CCCC1C(=O)N[C@H](C(=O)NCC(=O)N)CCCN=C(N)N)C" "OXDZADMCOWPSOC-ICBIOJHSSA-N" "InChI=1S/C43H67N15O12S2/c1-3-21(2)34-41(69)53-26(12-13-31(45)60)37(65)55-28(17-32(46)61)38(66)56-29(20-72-71-19-24(44)35(63)54-27(39(67)57-34)16-22-8-10-23(59)11-9-22)42(70)58-15-5-7-30(58)40(68)52-25(6-4-14-50-43(48)49)36(64)51-18-33(47)62/h8-11,21,24-30,34,59H,3-7,12-20,44H2,1-2H3,(H2,45,60)(H2,46,61)(H2,47,62)(H,51,64)(H,52,68)(H,53,69)(H,54,63)(H,55,65)(H,56,66)(H,57,67)(H4,48,49,50)/t21-,24-,25-,26-,27-,28-,29-,30?,34?/m0/s1" "" "" ""
-"240" "cisapride" "" "Synthetic organic" "yes" "yes" "" "" "135650104" "2769" "" "" "" "" "" "" "4-amino-5-chloro-N-[1-[3-(4-fluorophenoxy)propyl]-3-methoxypiperidin-4-yl]-2-methoxybenzamide" "cisapride" "Prepulsid®|Propulsid®" "COC1CN(CCCOc2ccc(cc2)F)CCC1NC(=O)c1cc(Cl)c(cc1OC)N" "DCSUBABJRXZOMT-UHFFFAOYSA-N" "InChI=1S/C23H29ClFN3O4/c1-30-21-13-19(26)18(24)12-17(21)23(29)27-20-8-10-28(14-22(20)31-2)9-3-11-32-16-6-4-15(25)5-7-16/h4-7,12-13,20,22H,3,8-11,14,26H2,1-2H3,(H,27,29)" "" "" ""
-"5343" "cisplatin" "" "Inorganic" "yes" "" "" "" "178102005" "441203" "" "" "" "" "" "" "" "cisplatin" "Platinol®" "" "" "" "" "" ""
-"3303" "L745870" "" "Synthetic organic" "" "" "" "" "178100340" "5311200" "" "" "" "" "" "" "3-[[4-(4-chlorophenyl)piperazin-1-yl]methyl]-1H-pyrrolo[2,3-b]pyridine" "" "L 745870|L-745,870" "Clc1ccc(cc1)N1CCN(CC1)Cc1c[nH]c2c1cccn2" "OGJGQVFWEPNYSB-UHFFFAOYSA-N" "InChI=1S/C18H19ClN4/c19-15-3-5-16(6-4-15)23-10-8-22(9-11-23)13-14-12-21-18-17(14)2-1-7-20-18/h1-7,12H,8-11,13H2,(H,20,21)" "" "" ""
-"2804" "phenobarbital" "" "Synthetic organic" "yes" "" "" "" "135650817" "4763" "" "" "" "" "" "" "5-ethyl-5-phenyl-1,3-diazinane-2,4,6-trione" "phenobarbital" "fenobarbital|Luminal®|phenobarb|phenobarbital sodium|phenobarbitone|phenylethylbarbiturate" "CCC1(C(=O)NC(=O)NC1=O)c1ccccc1" "DDBREPKUVSBGFI-UHFFFAOYSA-N" "InChI=1S/C12H12N2O3/c1-2-12(8-6-4-3-5-7-8)9(15)13-11(17)14-10(12)16/h3-7H,2H2,1H3,(H2,13,14,15,16,17)" "" "" ""
-"5260" "rolipram" "" "Synthetic organic" "" "" "" "" "178101944" "5092" "" "" "" "" "" "" "4-[3-(cyclopentyloxy)-4-methoxyphenyl]pyrrolidin-2-one" "rolipram" "(±)-rolipram|(R,S)-rolipram" "COc1ccc(cc1OC1CCCC1)C1CNC(=O)C1" "HJORMJIFDVBMOB-UHFFFAOYSA-N" "InChI=1S/C16H21NO3/c1-19-14-7-6-11(12-9-16(18)17-10-12)8-15(14)20-13-4-2-3-5-13/h6-8,12-13H,2-5,9-10H2,1H3,(H,17,18)" "yes" "" ""
diff --git a/tests/data/guidetopharmacology/gtop_ligands_2024.3.tsv b/tests/data/guidetopharmacology/gtop_ligands_2024.3.tsv
new file mode 100644
index 00000000..245cfc25
--- /dev/null
+++ b/tests/data/guidetopharmacology/gtop_ligands_2024.3.tsv
@@ -0,0 +1,8 @@
+"# GtoPdb Version: 2024.3 - published: 2024-10-03"
+"Ligand ID" "Name" "Species" "Type" "Approved" "Withdrawn" "Labelled" "Radioactive" "PubChem SID" "PubChem CID" "UniProt ID" "Ensembl ID" "ChEMBL ID" "Ligand Subunit IDs" "Ligand Subunit Name" "Ligand Subunit UniProt IDs" "Ligand Subunit Ensembl IDs" "IUPAC name" "INN" "Synonyms" "SMILES" "InChIKey" "InChI" "GtoImmuPdb" "GtoMPdb" "Antibacterial"
+"2169" "arginine vasotocin" "" "Peptide" "" "" "" "" "135652004" "68649" "" "" "" "" "" "" "" "L-cysteinyl-L-tyrosyl-(3S)-DL-isoleucyl-L-glutaminyl-L-asparagyl-L-cysteinyl-DL-prolyl-L-arginyl-glycinamide (1->6)-disulfide" "argiprestocin" "[Arg8]vasotocin|AVT" "CC[C@@H](C1NC(=O)[C@H](Cc2ccc(cc2)O)NC(=O)[C@@H](N)CSSC[C@H](NC(=O)[C@@H](NC(=O)[C@@H](NC1=O)CCC(=O)N)CC(=O)N)C(=O)N1CCCC1C(=O)N[C@H](C(=O)NCC(=O)N)CCCN=C(N)N)C" "OXDZADMCOWPSOC-ICBIOJHSSA-N" "InChI=1S/C43H67N15O12S2/c1-3-21(2)34-41(69)53-26(12-13-31(45)60)37(65)55-28(17-32(46)61)38(66)56-29(20-72-71-19-24(44)35(63)54-27(39(67)57-34)16-22-8-10-23(59)11-9-22)42(70)58-15-5-7-30(58)40(68)52-25(6-4-14-50-43(48)49)36(64)51-18-33(47)62/h8-11,21,24-30,34,59H,3-7,12-20,44H2,1-2H3,(H2,45,60)(H2,46,61)(H2,47,62)(H,51,64)(H,52,68)(H,53,69)(H,54,63)(H,55,65)(H,56,66)(H,57,67)(H4,48,49,50)/t21-,24-,25-,26-,27-,28-,29-,30?,34?/m0/s1" "" "" ""
+"240" "cisapride" "" "Synthetic organic" "yes" "yes" "" "" "135650104" "2769" "" "" "CHEMBL1729" "" "" "" "" "4-amino-5-chloro-N-[1-[3-(4-fluorophenoxy)propyl]-3-methoxypiperidin-4-yl]-2-methoxybenzamide" "cisapride" "Prepulsid®|Propulsid®" "COC1CN(CCCOc2ccc(cc2)F)CCC1NC(=O)c1cc(Cl)c(cc1OC)N" "DCSUBABJRXZOMT-UHFFFAOYSA-N" "InChI=1S/C23H29ClFN3O4/c1-30-21-13-19(26)18(24)12-17(21)23(29)27-20-8-10-28(14-22(20)31-2)9-3-11-32-16-6-4-15(25)5-7-16/h4-7,12-13,20,22H,3,8-11,14,26H2,1-2H3,(H,27,29)" "" "" ""
+"5343" "cisplatin" "" "Inorganic" "yes" "" "" "" "178102005" "441203" "" "" "CHEMBL11359" "" "" "" "" "" "cisplatin" "Platinol®" "" "" "" "" "" ""
+"3303" "L745870" "" "Synthetic organic" "" "" "" "" "178100340" "5311200" "" "" "CHEMBL267014" "" "" "" "" "3-[[4-(4-chlorophenyl)piperazin-1-yl]methyl]-1H-pyrrolo[2,3-b]pyridine" "" "L 745870|L-745,870" "Clc1ccc(cc1)N1CCN(CC1)Cc1c[nH]c2c1cccn2" "OGJGQVFWEPNYSB-UHFFFAOYSA-N" "InChI=1S/C18H19ClN4/c19-15-3-5-16(6-4-15)23-10-8-22(9-11-23)13-14-12-21-18-17(14)2-1-7-20-18/h1-7,12H,8-11,13H2,(H,20,21)" "" "" ""
+"2804" "phenobarbital" "" "Synthetic organic" "yes" "" "" "" "135650817" "4763" "" "" "CHEMBL40" "" "" "" "" "5-ethyl-5-phenyl-1,3-diazinane-2,4,6-trione" "phenobarbital" "fenobarbital|Luminal®|phenobarb|phenobarbital sodium|phenobarbitone|phenylethylbarbiturate" "CCC1(C(=O)NC(=O)NC1=O)c1ccccc1" "DDBREPKUVSBGFI-UHFFFAOYSA-N" "InChI=1S/C12H12N2O3/c1-2-12(8-6-4-3-5-7-8)9(15)13-11(17)14-10(12)16/h3-7H,2H2,1H3,(H2,13,14,15,16,17)" "" "" ""
+"5260" "rolipram" "" "Synthetic organic" "" "" "" "" "178101944" "5092" "" "" "CHEMBL63" "" "" "" "" "4-[3-(cyclopentyloxy)-4-methoxyphenyl]pyrrolidin-2-one" "rolipram" "(±)-rolipram|(R,S)-rolipram" "COc1ccc(cc1OC1CCCC1)C1CNC(=O)C1" "HJORMJIFDVBMOB-UHFFFAOYSA-N" "InChI=1S/C16H21NO3/c1-19-14-7-6-11(12-9-16(18)17-10-12)8-15(14)20-13-4-2-3-5-13/h6-8,12-13H,2-5,9-10H2,1H3,(H,17,18)" "yes" "" ""