diff --git a/config/interpro.yml b/config/interpro.yml index df2a29f8..476ba492 100644 --- a/config/interpro.yml +++ b/config/interpro.yml @@ -37,7 +37,7 @@ members: panther: filter: "panther" label: "PANTHER" - accession: 'PTHR[0-9]{5}(:SF[0-9]{1,3})?$' + accession: 'PTHR[0-9]{5}(:SF[0-9]{1,4})?$' options: [] options_per_family: [] pirsf: diff --git a/interpro/urls.py b/interpro/urls.py index af8f0462..d0bea8f0 100644 --- a/interpro/urls.py +++ b/interpro/urls.py @@ -2,6 +2,6 @@ from webfront.views import common, mail urlpatterns = [ - url(r"^api/mail/$", mail.mail_interhelp), + url(r"^api/mail/$", mail.send_email), url(r"^api/(?P.*)$", common.GeneralHandler.as_view()), ] diff --git a/webfront/exceptions.py b/webfront/exceptions.py index 9882cf7d..525ec741 100644 --- a/webfront/exceptions.py +++ b/webfront/exceptions.py @@ -1,9 +1,12 @@ class DeletedEntryError(Exception): - def __init__(self, accession, date, message, history): + def __init__(self, accession, database, _type, name, short_name, history, date): self.accession = accession - self.message = message - self.date = date + self.database = database + self.type = _type + self.name = name + self.short_name = short_name self.history = history + self.date = date class EmptyQuerysetError(Exception): diff --git a/webfront/migrations/0023_rename_is_alive_entry_is_public.py b/webfront/migrations/0023_rename_is_alive_entry_is_public.py new file mode 100644 index 00000000..568e6551 --- /dev/null +++ b/webfront/migrations/0023_rename_is_alive_entry_is_public.py @@ -0,0 +1,18 @@ +# Generated by Django 3.2.20 on 2023-09-19 21:37 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('webfront', '0022_chain_sequence'), + ] + + operations = [ + migrations.RenameField( + model_name='entry', + old_name='is_alive', + new_name='is_public', + ), + ] diff --git a/webfront/migrations/0024_entry_llm_description.py b/webfront/migrations/0024_entry_llm_description.py new file mode 100644 index 00000000..4a3f6525 --- /dev/null +++ b/webfront/migrations/0024_entry_llm_description.py @@ -0,0 +1,18 @@ +# Generated by Django 3.2.19 on 2023-10-03 11:38 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('webfront', '0023_rename_is_alive_entry_is_public'), + ] + + operations = [ + migrations.AddField( + model_name='entry', + name='llm_description', + field=models.TextField(null=True), + ), + ] diff --git a/webfront/models/interpro_new.py b/webfront/models/interpro_new.py index 0b1e9c95..bbcb7c30 100644 --- a/webfront/models/interpro_new.py +++ b/webfront/models/interpro_new.py @@ -31,6 +31,7 @@ class Entry(models.Model): ) go_terms = JSONField(null=True) description = JSONField(null=True) + llm_description = models.TextField(null=True) wikipedia = JSONField(null=True) literature = JSONField(null=True) hierarchy = JSONField(null=True) @@ -38,7 +39,7 @@ class Entry(models.Model): entry_date = models.DateTimeField(null=True) is_featured = models.BooleanField(default=False) overlaps_with = JSONField(default=[]) - is_alive = models.BooleanField(default=False) + is_public = models.BooleanField(default=False) deletion_date = models.DateTimeField(null=True) counts = JSONField(null=True) interactions = JSONField(null=True) diff --git a/webfront/serializers/interpro.py b/webfront/serializers/interpro.py index d2b52469..be775fca 100644 --- a/webfront/serializers/interpro.py +++ b/webfront/serializers/interpro.py @@ -230,6 +230,7 @@ def to_metadata_representation(instance, searcher, sq, counters=None): # "other": instance.other_names, }, "description": instance.description, + "llm_description": instance.llm_description, "wikipedia": instance.wikipedia, "literature": instance.literature, "set_info": instance.set_info, diff --git a/webfront/tests/fixtures_entry.json b/webfront/tests/fixtures_entry.json index 08dc675d..3b166678 100644 --- a/webfront/tests/fixtures_entry.json +++ b/webfront/tests/fixtures_entry.json @@ -10,7 +10,7 @@ "source_database": "interpro", "entry_date": "2001-04-21T00:00:00Z", "is_featured": true, - "is_alive": true, + "is_public": true, "member_databases": { "smart": { "SM00950": "Piwi domain" @@ -173,7 +173,7 @@ "member_databases": {}, "entry_date": "2002-04-21T00:00:00Z", "is_featured": false, - "is_alive": true, + "is_public": true, "integrated": null, "hierarchy": { "children": [ @@ -284,7 +284,7 @@ "type": "domain", "name": "Piwi domain", "is_featured": true, - "is_alive": true, + "is_public": true, "entry_date": "2003-04-21T00:00:00Z", "short_name": "Piwi", "go_terms": [ @@ -392,7 +392,7 @@ "name": "Piwi domain", "entry_date": "2004-04-21T00:00:00Z", "is_featured": true, - "is_alive": true, + "is_public": true, "short_name": "Piwi", "go_terms": [ { @@ -432,7 +432,7 @@ "entry_date": "2001-04-21T00:00:00Z", "name": "PIWI", "is_featured": true, - "is_alive": true, + "is_public": true, "short_name": "Piwi", "go_terms": [], "source_database": "profile", @@ -461,7 +461,7 @@ "entry_id": "id3", "type": "domain", "is_featured": true, - "is_alive": true, + "is_public": true, "entry_date": "2002-04-21T00:00:00Z", "go_terms": [], "source_database": "pfam", @@ -491,7 +491,7 @@ "type": "domain", "entry_date": "2002-04-21T00:00:00Z", "is_featured": true, - "is_alive": true, + "is_public": true, "go_terms": [], "source_database": "pfam", "member_databases": {}, @@ -521,7 +521,7 @@ "name": "Myelin family", "entry_date": "2003-04-21T00:00:00Z", "is_featured": true, - "is_alive": true, + "is_public": true, "short_name": "Myelin", "go_terms": [ { @@ -563,7 +563,7 @@ "go_terms": [], "entry_date": "2004-04-21T00:00:00Z", "is_featured": true, - "is_alive": true, + "is_public": true, "source_database": "profile", "member_databases": null, "integrated": null, @@ -594,7 +594,7 @@ "go_terms": [], "entry_date": "2014-04-21T00:00:00Z", "is_featured": true, - "is_alive": true, + "is_public": true, "source_database": "ncbifam", "member_databases": null, "integrated": null, @@ -616,12 +616,13 @@ "model": "webfront.Entry", "fields": { "entry_id": null, - "accession": "IPR123456", - "type": "D", - "name": "Deleted", - "entry_date": "1994-04-21T00:00:00Z", - "deletion_date": "1994-04-21T00:00:00Z", - "is_alive": false, + "accession": "IPR000005", + "type": "domain", + "name": "HTH transcriptional regulator, AraC", + "short_name": "HTH_AraC", + "entry_date": "1999-10-08T17:07:25Z", + "deletion_date": "2010-10-28T10:51:29Z", + "is_public": true, "source_database": "interpro", "history": { "names": [ @@ -630,32 +631,45 @@ "Helix-turn-helix, AraC type", "HTH transcriptional regulator, AraC" ], - "signatures": [ - { - "accession": "PS00041", - "exists": true, - "integrated_id": "IPR018062" + "short_names": [ + "HTHAraC", + "HTH_AraC-typ", + "HTH_AraC" + ], + "signatures": { + "prosite": { + "PS00041": "IPR018062" }, - { - "accession": "PR00032", - "exists": true, - "integrated_id": "IPR020449" + "pfam": { + "PF00165": "IPR018060" }, - { - "accession": "PF00165", - "exists": true, - "integrated_id": "IPR018060" + "prints": { + "PR00032": "IPR020449" }, - { - "accession": "PS01124", - "exists": true, - "integrated_id": "IPR018060" + "profile": { + "PS01124": "IPR018060" }, - { - "accession": "SM00342", - "exists": true, - "integrated_id": "IPR018060" + "smart": { + "SM00342": "IPR018060" } + } + } + } + }, + { + "model": "webfront.Entry", + "fields": { + "entry_id": null, + "accession": "PF20534", + "type": "domain", + "short_name": "DUF6749", + "entry_date": "2022-07-01T14:20:35Z", + "deletion_date": "2023-07-24T17:42:21Z", + "is_public": true, + "source_database": "pfam", + "history": { + "short_names": [ + "DUF6749" ] } } @@ -671,7 +685,7 @@ "go_terms": [], "entry_date": "2014-03-02T00:00:00Z", "is_featured": true, - "is_alive": true, + "is_public": true, "source_database": "panther", "member_databases": null, "integrated": null, @@ -703,7 +717,7 @@ "go_terms": [], "entry_date": "2014-03-02T00:00:00Z", "is_featured": true, - "is_alive": false, + "is_public": false, "source_database": "panther", "member_databases": null, "integrated": "PTHR43214", @@ -733,7 +747,7 @@ "go_terms": [], "entry_date": "2004-03-02T00:00:00Z", "is_featured": true, - "is_alive": true, + "is_public": true, "source_database": "cathgene3d", "member_databases": null, "integrated": null, @@ -763,7 +777,7 @@ "go_terms": [], "entry_date": "2004-03-02T00:00:00Z", "is_featured": true, - "is_alive": false, + "is_public": false, "source_database": "cathgene3d", "member_databases": null, "integrated": "G3DSA:1.10.10.10", @@ -780,5 +794,35 @@ "domain_architectures": 0 } } + }, + { + "model": "webfront.Entry", + "fields": { + "entry_id": null, + "accession": "PTHR10000", + "type": "family", + "name": "PHOSPHOSERINE PHOSPHATASE", + "source_database": "panther", + "llm_description": "

The protein family belongs to the HAD-like hydrolase superfamily and includes subfamilies such as the Cof family, CbbY/CbbZ/Gph/YieH family, SupH subfamily, the archaeal SPP-like hydrolase family, and the MPGP family. The proteins in this family function as phosphatases, catalyzing the dephosphorylation of various substrates. These include 2-phosphoglycolate, the riboflavin precursor 5-amino-6-(5-phospho-D-ribitylamino)uracil, flavin mononucleotide (FMN), pyridoxal-phosphate (PLP), and different sugar phosphates. Some proteins specifically hydrolyze mannosyl-3-phosphoglycerate (MPG) to form the osmolyte mannosylglycerate (MG), while others are involved in the biosynthesis of kanosamine and glucosylglycerate. The activity of these proteins can be inhibited by high concentrations of Ca(2+) ions and chloride ions.

", + "is_featured": false, + "is_public": true, + "entry_date": "2005-09-11T00:00:00Z", + "counts": { + "subfamilies": 8, + "domain_architectures": 0, + "interactions": 0, + "matches": 91777, + "pathways": 0, + "proteins": 91777, + "proteomes": 6899, + "sets": 0, + "structural_models": { + "alphafold": 79631, + "rosettafold": 0 + }, + "structures": 36, + "taxa": 28523 + } + } } ] diff --git a/webfront/tests/test_mail.py b/webfront/tests/test_mail.py index b758bdfd..f688d191 100644 --- a/webfront/tests/test_mail.py +++ b/webfront/tests/test_mail.py @@ -1,9 +1,12 @@ +import time + from django.test import TestCase from django.test import Client +from rest_framework import status class TestMail(TestCase): - def test_mail(self): + def test_mail(self, sleep=60): self.client = Client() response = self.client.post( "/api/mail/", @@ -14,4 +17,37 @@ def test_mail(self): "from_email": "swaathik@ebi.ac.uk", }, ) + self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(response.json()["from"], "swaathik@ebi.ac.uk") + time.sleep(sleep) + + def test_spam(self): + self.test_mail(sleep=0) + self.client = Client() + response = self.client.post( + "/api/mail/", + { + "path": "echo", + "subject": "Add annotation test from API", + "message": "Test", + "from_email": "swaathik@ebi.ac.uk", + }, + ) + self.assertEqual(response.status_code, + status.HTTP_429_TOO_MANY_REQUESTS) + time.sleep(60) + + def test_mail_invalid_queue(self): + self.client = Client() + response = self.client.post( + "/api/mail/", + { + "path": "echo", + "subject": "Add annotation test from API", + "message": "Test", + "queue": "uniprot", + "from_email": "swaathik@ebi.ac.uk", + }, + ) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + time.sleep(60) diff --git a/webfront/tests/tests_entry_endpoint.py b/webfront/tests/tests_entry_endpoint.py index 89ba44a2..87b7fde0 100644 --- a/webfront/tests/tests_entry_endpoint.py +++ b/webfront/tests/tests_entry_endpoint.py @@ -11,7 +11,7 @@ def test_dummy_dataset_is_loaded(self): ) self.assertIn( Entry.objects.filter(source_database="interpro").first().accession.upper(), - ["IPR003165", "IPR001165"], + ["IPR003165", "IPR001165", "IPR000005"], ) def test_content_of_a_json_attribute(self): @@ -51,7 +51,7 @@ def test_can_read_entry_unintegrated(self): response = self.client.get("/api/entry/unintegrated") self.assertEqual(response.status_code, status.HTTP_200_OK) self._check_is_list_of_objects_with_key(response.data["results"], "metadata") - self.assertEqual(len(response.data["results"]), 7) + self.assertEqual(len(response.data["results"]), 8) def test_can_read_entry_interpro_id(self): acc = "IPR003165" @@ -64,8 +64,8 @@ def test_fail_entry_interpro_unknown_id(self): "/api/entry/interpro/IPR999999", code=status.HTTP_204_NO_CONTENT ) - def test_gets_410_for_deleted_entry(self): - url = "/api/entry/interpro/ipr123456" + def test_deleted_entry(self): + url = "/api/entry/interpro/IPR000005" self._check_HTTP_response_code(url, code=status.HTTP_410_GONE) response = self.client.get(url) self.assertIn("history", response.data) @@ -188,6 +188,12 @@ def test_can_get_protein_amount_from_interpro_id_pfam_id(self): ) self.assertEqual(response.data["metadata"]["counters"]["proteins"], 1) + def test_entry_with_llm_description(self): + response = self.client.get("/api/entry/panther/PTHR10000/") + meta = response.data["metadata"] + self.assertIn("llm_description", meta) + self.assertTrue(meta["llm_description"] is not None) + class ReplaceTIGRFamsTest(InterproRESTTestCase): def test_can_read_ncbifam(self): diff --git a/webfront/views/common.py b/webfront/views/common.py index b5af4128..4e250a3f 100644 --- a/webfront/views/common.py +++ b/webfront/views/common.py @@ -204,14 +204,16 @@ def query(args): # DeletedEntryError is still a valid response so a response object is created and saved in cache if settings.DEBUG: raise - content = { - "detail": e.args[2], - "accession": e.args[0], - "date": e.args[1], - } - if len(e.args) > 3 and e.args[3] is not None: - content["history"] = e.args[3] - response = Response(content, status=status.HTTP_410_GONE) + + response = Response({ + "accession": e.accession, + "source_database": e.database, + "type": e.type, + "name": e.name, + "short_name": e.short_name, + "deletion_date": e.date.strftime("%Y-%m-%dT00:00:00.000Z"), + "history": e.history + }, status=status.HTTP_410_GONE) self._set_in_cache(caching_allowed, full_path, response) except EmptyQuerysetError as e: # EmptyQuerysetError is still a valid response so a response object is created and saved in cache @@ -298,11 +300,22 @@ def search_modifier(self, search, general_handler): return if general_handler.queryset_manager.main_endpoint == "taxonomy": self.queryset_manager.add_filter( - "search", accession__icontains=search, full_name__icontains=search + "search", + accession__icontains=search, + full_name__icontains=search + ) + elif general_handler.queryset_manager.main_endpoint == "entry": + self.queryset_manager.add_filter( + "search", + accession__icontains=search, + name__icontains=search, + short_name__icontains=search ) else: self.queryset_manager.add_filter( - "search", accession__icontains=search, name__icontains=search + "search", + accession__icontains=search, + name__icontains=search ) @staticmethod diff --git a/webfront/views/entry.py b/webfront/views/entry.py index a38deb0d..c82093ea 100644 --- a/webfront/views/entry.py +++ b/webfront/views/entry.py @@ -50,10 +50,24 @@ def get( *args, **kwargs ): + acc = endpoint_levels[level - 1].lower() general_handler.queryset_manager.add_filter( - "entry", accession__iexact=endpoint_levels[level - 1].lower() + "entry", accession__iexact=acc ) + # Check whether the entry is deleted + general_handler.queryset_manager.add_filter("entry", + deletion_date__isnull=False) + qs = general_handler.queryset_manager.get_queryset() + if qs.count() > 0: + first = qs.first() + raise DeletedEntryError(acc, first.source_database, first.type, + first.name, first.short_name, first.history, + first.deletion_date) + + general_handler.queryset_manager.add_filter("entry", + deletion_date__isnull=True) + general_handler.modifiers.register( "model:structure", get_model("structure"), @@ -242,19 +256,14 @@ def get( general_handler.queryset_manager.add_filter("entry", accession__iexact=acc) # Checking if the entry has been marked as deleted - general_handler.queryset_manager.add_filter("entry", is_alive=False) + general_handler.queryset_manager.add_filter("entry", deletion_date__isnull=False) qs = general_handler.queryset_manager.get_queryset() if qs.count() > 0: first = qs.first() - date = first.deletion_date - history = first.history - raise DeletedEntryError( - acc, - date, - "The entry {} is not active. Removed: {}".format(acc, date), - history, - ) - general_handler.queryset_manager.add_filter("entry", is_alive=True) + raise DeletedEntryError(acc, first.source_database, first.type, + first.name, first.short_name, first.history, + first.deletion_date) + general_handler.queryset_manager.add_filter("entry", deletion_date__isnull=True) general_handler.modifiers.register( "annotation", @@ -596,7 +605,8 @@ def get( ): general_handler.queryset_manager.reset_filters("entry", endpoint_levels) general_handler.queryset_manager.add_filter("entry", accession__isnull=False) - general_handler.queryset_manager.add_filter("entry", is_alive=True) + general_handler.queryset_manager.add_filter("entry", deletion_date__isnull=True) + general_handler.queryset_manager.add_filter("entry", is_public=True) general_handler.modifiers.register( "group_by", group_by( @@ -655,5 +665,6 @@ def get( @staticmethod def filter(queryset, level_name="", general_handler=None): general_handler.queryset_manager.add_filter("entry", accession__isnull=False) - general_handler.queryset_manager.add_filter("entry", is_alive=True) + general_handler.queryset_manager.add_filter("entry", deletion_date__isnull=True) + general_handler.queryset_manager.add_filter("entry", is_public=True) return queryset diff --git a/webfront/views/mail.py b/webfront/views/mail.py index 9a36deca..05464649 100644 --- a/webfront/views/mail.py +++ b/webfront/views/mail.py @@ -8,7 +8,7 @@ @csrf_exempt -def mail_interhelp(request): +def send_email(request): ip_address = get_client_ip(request) now = datetime.now() if not hasattr(settings, "credentials"): @@ -47,10 +47,15 @@ def mail(request): subject = request.POST.get("subject", "") message = request.POST.get("message", "") from_email = request.POST.get("from_email", "") - if path and subject and message and from_email: + queue = request.POST.get("queue", "interpro").lower() + to_email = { + "interpro": "interhelp@ebi.ac.uk", + "pfam": "pfam-help@ebi.ac.uk" + }.get(queue, "") + if path and subject and message and from_email and to_email: message = MIMEText(message) message["From"] = from_email - message["To"] = "interhelp@ebi.ac.uk" + message["To"] = to_email message["Subject"] = subject p = Popen([path, "-t", "-oi"], stdin=PIPE) p.communicate(message.as_bytes()) diff --git a/webfront/views/modifiers.py b/webfront/views/modifiers.py index 1d8e91b2..9aef95f0 100644 --- a/webfront/views/modifiers.py +++ b/webfront/views/modifiers.py @@ -905,7 +905,7 @@ def get_model_structure(value, general_handler): def get_subfamilies(value, general_handler): queryset = general_handler.queryset_manager.get_queryset().first() - entries = Entry.objects.filter(integrated=queryset.accession, is_alive=False) + entries = Entry.objects.filter(integrated=queryset.accession, is_public=False) if isinstance(value, str) and value.strip(): entries = entries.filter(accession=value) if len(entries) == 0: @@ -915,7 +915,7 @@ def get_subfamilies(value, general_handler): def mark_as_subfamily(value, general_handler): - general_handler.queryset_manager.add_filter("entry", is_alive=False) + general_handler.queryset_manager.add_filter("entry", is_public=False) def passing(x, y):