From 57942fb061fab6b2a49ae60b8aa5694c4167f128 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 22 Apr 2022 22:56:12 +0000 Subject: [PATCH 01/26] Bump django from 3.2.12 to 3.2.13 Bumps [django](https://github.com/django/django) from 3.2.12 to 3.2.13. - [Release notes](https://github.com/django/django/releases) - [Commits](https://github.com/django/django/compare/3.2.12...3.2.13) --- updated-dependencies: - dependency-name: django dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index d10f33ca..fc2784cf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -Django==3.2.12 +Django==3.2.13 djangorestframework==3.12.4 PyYAML==6.0 jsonfield2==4.0.0.post0 From 757826cd8b6485b8a14aa402445afc7233caf875 Mon Sep 17 00:00:00 2001 From: "Gustavo A. Salazar" Date: Fri, 19 Aug 2022 14:45:42 +0100 Subject: [PATCH 02/26] panther subfamilies as integrated into panther families --- webfront/tests/fixtures_entry.json | 63 +++++++++++++++++++ webfront/tests/relationship_features.json | 21 +++++++ .../tests/tests_3_endpoints_using_searcher.py | 1 + webfront/tests/tests_entry_endpoint.py | 2 +- .../tests_entry_endpoint_protein_filter.py | 1 + webfront/tests/tests_modifiers.py | 14 +++++ .../tests_protein_endpoint_entry_filter.py | 2 +- .../tests_structure_endpoint_entry_filter.py | 8 +-- webfront/views/entry.py | 8 +++ webfront/views/modifiers.py | 9 +++ 10 files changed, 123 insertions(+), 6 deletions(-) diff --git a/webfront/tests/fixtures_entry.json b/webfront/tests/fixtures_entry.json index f65ec1b0..af56a570 100644 --- a/webfront/tests/fixtures_entry.json +++ b/webfront/tests/fixtures_entry.json @@ -630,5 +630,68 @@ ] } } + }, + { + "model": "webfront.Entry", + "fields": { + "entry_id": null, + "accession": "PTHR43214", + "type": "D", + "name": "TWO-COMPONENT RESPONSE REGULATOR", + "short_name": "PTHR43214", + "go_terms": [], + "entry_date": "2014-03-02T00:00:00Z", + "is_featured": true, + "is_alive": true, + "source_database": "panther", + "member_databases": null, + "integrated": null, + "description": [ + "two-component response regulator." + ], + "wikipedia": null, + "literature": {}, + "cross_references": {}, + "counts": { + "proteins": 0, + "structures": 0, + "taxa": 0, + "proteomes": 0, + "sets": 0, + "domain_architectures": 0, + "subfamilies": 1 + } + } + }, + { + "model": "webfront.Entry", + "fields": { + "entry_id": null, + "accession": "PTHR43214:sf24", + "type": "D", + "name": "NITRATE/NITRITE RESPONSE REGULATOR PROTEIN NARL", + "short_name": "PTHR43214:sf24", + "go_terms": [], + "entry_date": "2014-03-02T00:00:00Z", + "is_featured": true, + "is_alive": false, + "source_database": "panther", + "member_databases": null, + "integrated": "PTHR43214", + "description": [ + "nitrate/nitrite response regulator protein narl." + ], + "wikipedia": null, + "literature": {}, + "cross_references": {}, + "counts": { + "proteins": 0, + "structures": 0, + "taxa": 0, + "proteomes": 0, + "sets": 0, + "domain_architectures": 0 + } + } } ] diff --git a/webfront/tests/relationship_features.json b/webfront/tests/relationship_features.json index d731a0a9..47746e57 100644 --- a/webfront/tests/relationship_features.json +++ b/webfront/tests/relationship_features.json @@ -159,6 +159,27 @@ ] } }, + { + "model": "webfront.ProteinEntryFeature", + "fields": { + "entry": "pthr43214", + "protein": "m5adk6", + "coordinates": [ + { + "fragments": [ + { + "start": 12, + "end": 46 + } + ], + "subfamily": { + "name": "NITRATE/NITRITE RESPONSE REGULATOR PROTEIN NARL", + "accerssion":"PTHR43214:sf24" + } + } + ] + } + }, { "model": "webfront.ProteinStructureFeature", "fields": { diff --git a/webfront/tests/tests_3_endpoints_using_searcher.py b/webfront/tests/tests_3_endpoints_using_searcher.py index c5ffcad9..3c97a654 100644 --- a/webfront/tests/tests_3_endpoints_using_searcher.py +++ b/webfront/tests/tests_3_endpoints_using_searcher.py @@ -14,6 +14,7 @@ "pfam": ["PF02171", "PF17180", "PF17176"], "smart": ["SM00950", "SM00002"], "profile": ["PS50822", "PS01031"], + "panther": ["PTHR43214"], }, "protein": { "uniprot": ["A1CUJ5", "M5ADK6", "A0A0A2L2G2", "P16582"], diff --git a/webfront/tests/tests_entry_endpoint.py b/webfront/tests/tests_entry_endpoint.py index e438302f..e6a24299 100644 --- a/webfront/tests/tests_entry_endpoint.py +++ b/webfront/tests/tests_entry_endpoint.py @@ -51,7 +51,7 @@ def test_can_read_entry_unintegrated(self): response = self.client.get("/api/entry/unintegrated") self.assertEqual(response.status_code, status.HTTP_200_OK) self._check_is_list_of_objects_with_key(response.data["results"], "metadata") - self.assertEqual(len(response.data["results"]), 4) + self.assertEqual(len(response.data["results"]), 5) def test_can_read_entry_interpro_id(self): acc = "IPR003165" diff --git a/webfront/tests/tests_entry_endpoint_protein_filter.py b/webfront/tests/tests_entry_endpoint_protein_filter.py index 7f97d405..b912413c 100644 --- a/webfront/tests/tests_entry_endpoint_protein_filter.py +++ b/webfront/tests/tests_entry_endpoint_protein_filter.py @@ -196,6 +196,7 @@ def test_can_get_proteins_from_entry_db_protein_id(self): "/api/entry/interpro/" + acc + "/pfam//protein/uniprot/" + prot: ["A1CUJ5"], "/api/entry/pfam/protein/uniprot/" + prot: ["A1CUJ5"], "/api/entry/unintegrated/pfam/protein/uniprot/" + prot_u: ["M5ADK6"], + "/api/entry/panther/protein/uniprot/" + prot_u: ["M5ADK6"], } for url in tests: response = self.client.get(url) diff --git a/webfront/tests/tests_modifiers.py b/webfront/tests/tests_modifiers.py index e78ef556..b2d43654 100644 --- a/webfront/tests/tests_modifiers.py +++ b/webfront/tests/tests_modifiers.py @@ -504,3 +504,17 @@ def test_model_lddt_modifier(self): data = json.loads(content) self.assertEqual(3, len(data)) self.assertTrue(all([0 <= item <= 1 for item in data])) + +class PantherSubfamilyTest(InterproRESTTestCase): + def test_subfamilies_counter(self): + response = self.client.get("/api/entry/panther/PTHR43214") + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertIn("counters", response.data["metadata"]) + self.assertEqual(response.data["metadata"]["counters"]["subfamilies"], 1) + # self.assertEqual(len(response.data["results"]), 1) + def test_panther_subfamilies(self): + response = self.client.get("/api/entry/panther/PTHR43214?subfamilies") + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(len(response.data["results"]), 1) + self.assertEqual(response.data["results"][0]["metadata"]["accession"], "PTHR43214:sf24") + self.assertEqual(response.data["results"][0]["metadata"]["integrated"], "PTHR43214") diff --git a/webfront/tests/tests_protein_endpoint_entry_filter.py b/webfront/tests/tests_protein_endpoint_entry_filter.py index a97e2fee..c787ef3f 100644 --- a/webfront/tests/tests_protein_endpoint_entry_filter.py +++ b/webfront/tests/tests_protein_endpoint_entry_filter.py @@ -138,7 +138,7 @@ def test_urls_that_return_a_protein_details_with_matches(self): "/api/protein/uniprot/" + sp_2 + "/entry/interpro": ["IPR003165", "IPR001165"], - "/api/protein/uniprot/" + sp_1 + "/entry/unintegrated": ["PF17180"], + "/api/protein/uniprot/" + sp_1 + "/entry/unintegrated": ["PF17180", "PTHR43214"], "/api/protein/uniprot/" + sp_2 + "/entry/pfam": ["PF17176", "PF02171"], "/api/protein/uniprot/" + sp_2 + "/entry/interpro/pfam": ["PF02171"], "/api/protein/uniprot/" + sp_2 + "/entry/interpro/smart": ["SM00950"], diff --git a/webfront/tests/tests_structure_endpoint_entry_filter.py b/webfront/tests/tests_structure_endpoint_entry_filter.py index 3039e8a4..42c54287 100644 --- a/webfront/tests/tests_structure_endpoint_entry_filter.py +++ b/webfront/tests/tests_structure_endpoint_entry_filter.py @@ -121,10 +121,10 @@ def test_urls_that_return_a_structure_details_with_matches(self): pdb_2 = "2BKM" acc = "IPR003165" urls = { - "/api/structure/pdb/" + pdb_2 + "/entry/unintegrated": ["PF17180"], + "/api/structure/pdb/" + pdb_2 + "/entry/unintegrated": ["PF17180", "PTHR43214"], "/api/structure/pdb/" + pdb_1 - + "/entry/unintegrated": ["PF17180", "PF17176"], + + "/entry/unintegrated": ["PF17180", "PF17176", "PTHR43214"], "/api/structure/pdb/" + pdb_1 + "/entry/interpro": ["IPR003165", "IPR001165"], @@ -165,9 +165,9 @@ def test_urls_that_return_a_structure_details_with_matches_from_chain(self): pdb_2 = "2BKM" acc = "IPR003165" urls = { - "/api/structure/pdb/" + pdb_2 + "/B/entry/unintegrated": ["PF17180"], + "/api/structure/pdb/" + pdb_2 + "/B/entry/unintegrated": ["PF17180", "PTHR43214"], "/api/structure/pdb/" + pdb_1 + "/A/entry/unintegrated": ["PF17176"], - "/api/structure/pdb/" + pdb_1 + "/B/entry/unintegrated": ["PF17180"], + "/api/structure/pdb/" + pdb_1 + "/B/entry/unintegrated": ["PF17180", "PTHR43214"], "/api/structure/pdb/" + pdb_1 + "/A/entry/interpro": ["IPR003165", "IPR001165"], diff --git a/webfront/views/entry.py b/webfront/views/entry.py index ffa85edd..048bab2a 100644 --- a/webfront/views/entry.py +++ b/webfront/views/entry.py @@ -19,6 +19,7 @@ get_value_for_field, get_model, get_sunburst_taxa, + get_subfamilies, ) from .custom import CustomView, SerializerDetail from django.conf import settings @@ -91,6 +92,13 @@ def get( general_handler.modifiers.register( "taxa", get_sunburst_taxa, type=ModifierType.REPLACE_PAYLOAD ) + general_handler.modifiers.register( + "subfamilies", + get_subfamilies, + type=ModifierType.REPLACE_PAYLOAD, + serializer=SerializerDetail.ENTRY_HEADERS, + many=True, + ) return super(MemberAccessionHandler, self).get( request._request, diff --git a/webfront/views/modifiers.py b/webfront/views/modifiers.py index 8478167e..feb64951 100644 --- a/webfront/views/modifiers.py +++ b/webfront/views/modifiers.py @@ -855,5 +855,14 @@ def get_model_structure(value, general_handler): return get_model_structure +def get_subfamilies(value, general_handler): + queryset = general_handler.queryset_manager.get_queryset().first() + entries = Entry.objects.filter(integrated=queryset.accession, source_database='panther', is_alive=False) + if len(entries) == 0: + raise EmptyQuerysetError( + "There is are not subfamilies for this entry" + ) + return entries + def passing(x, y): pass From 6180203a10000178a1a0408de96246f8b4322c83 Mon Sep 17 00:00:00 2001 From: "Gustavo A. Salazar" Date: Fri, 19 Aug 2022 15:36:29 +0100 Subject: [PATCH 03/26] deps bump --- dev_requirements.txt | 6 +++--- requirements.txt | 14 ++++++-------- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/dev_requirements.txt b/dev_requirements.txt index 1c410035..b56b6711 100644 --- a/dev_requirements.txt +++ b/dev_requirements.txt @@ -1,5 +1,5 @@ selenium==3.141.0 -django-debug-toolbar==3.2.2 +django-debug-toolbar==3.6.0 ipdb==0.13.9 -coveralls==3.2.0 -tqdm==4.62.3 +coveralls==3.3.1 +tqdm==4.64.0 diff --git a/requirements.txt b/requirements.txt index fc2784cf..acbecc37 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,12 +1,10 @@ -Django==3.2.13 -djangorestframework==3.12.4 +Django==3.2.15 +djangorestframework==3.13.1 PyYAML==6.0 jsonfield2==4.0.0.post0 pymysql==1.0.2 -django-cors-headers==3.10.0 +django-cors-headers==3.13.0 gunicorn==20.1.0 -eventlet==0.32.0 -django-db-connection-pool==1.0.7 -django-redis==5.0.0 -redis==3.5.3 -requests==2.26.0 +django-redis==5.2.0 +redis==4.3.4 +requests==2.28.1 From 8e4664464922b3149cc6ae931b978b27191e1468 Mon Sep 17 00:00:00 2001 From: "Gustavo A. Salazar" Date: Fri, 19 Aug 2022 16:02:16 +0100 Subject: [PATCH 04/26] deps bump 2 --- dev_requirements.txt | 2 +- functional_tests/base.py | 3 ++- functional_tests/tests.py | 25 +++++++++++++------------ 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/dev_requirements.txt b/dev_requirements.txt index b56b6711..45db826c 100644 --- a/dev_requirements.txt +++ b/dev_requirements.txt @@ -1,4 +1,4 @@ -selenium==3.141.0 +selenium==4.4.3 django-debug-toolbar==3.6.0 ipdb==0.13.9 coveralls==3.3.1 diff --git a/functional_tests/base.py b/functional_tests/base.py index 4b366eed..39384243 100644 --- a/functional_tests/base.py +++ b/functional_tests/base.py @@ -1,6 +1,7 @@ from django.contrib.staticfiles.testing import StaticLiveServerTestCase from django.test import override_settings from selenium import webdriver +from selenium.webdriver.common.by import By import sys import time import os @@ -63,7 +64,7 @@ def click_link_and_wait(self, link): def link_has_gone_stale(): try: # poll the link with an arbitrary call - link.find_elements_by_id("doesnt-matter") + link.find_elements(By.ID, "doesnt-matter") return False except StaleElementReferenceException: return True diff --git a/functional_tests/tests.py b/functional_tests/tests.py index 8e3348ed..0dc1632f 100644 --- a/functional_tests/tests.py +++ b/functional_tests/tests.py @@ -1,4 +1,5 @@ from functional_tests.base import FunctionalTest +from selenium.webdriver.common.by import By import json import re @@ -6,7 +7,7 @@ class RESTRequestsTest(FunctionalTest): def test_request_entry_endpoint(self): self.browser.get(self.server_url + "/api/entry/?format=json") - content = self.browser.find_element_by_tag_name("body").text + content = self.browser.find_element(By.TAG_NAME, "body").text jsonp = json.loads(content) self.assertEqual(len(jsonp["entries"]), 5, "the output has exactly 5 keys") @@ -20,7 +21,7 @@ def test_request_entry_endpoint(self): ) self.browser.get(self.server_url + "/api/entry/interpro?format=json") - content = self.browser.find_element_by_tag_name("body").text + content = self.browser.find_element(By.TAG_NAME, "body").text jsonp = json.loads(content) @@ -35,7 +36,7 @@ def test_request_entry_endpoint(self): self.browser.get( self.server_url + "/api/entry/interpro/" + acc + "?format=json" ) - content = self.browser.find_element_by_tag_name("body").text + content = self.browser.find_element(By.TAG_NAME, "body").text jsonp = json.loads(content) self.assertEqual( @@ -56,7 +57,7 @@ def test_request_entry_endpoint(self): def test_request_protein_endpoint(self): self.browser.get(self.server_url + "/api/protein/?format=json") - content = self.browser.find_element_by_tag_name("body").text + content = self.browser.find_element(By.TAG_NAME, "body").text jsonp = json.loads(content) @@ -70,7 +71,7 @@ def test_request_protein_endpoint(self): ) self.browser.get(self.server_url + "/api/protein/uniprot?format=json") - content = self.browser.find_element_by_tag_name("body").text + content = self.browser.find_element(By.TAG_NAME, "body").text jsonp = json.loads(content) @@ -84,7 +85,7 @@ def test_request_protein_endpoint(self): self.browser.get( self.server_url + "/api/protein/uniprot/" + acc + "?format=json" ) - content = self.browser.find_element_by_tag_name("body").text + content = self.browser.find_element(By.TAG_NAME, "body").text jsonp = json.loads(content) self.assertEqual( @@ -104,7 +105,7 @@ def test_request_protein_endpoint(self): + jsonp["metadata"]["id"] + "?format=json" ) - content2 = self.browser.find_element_by_tag_name("body").text + content2 = self.browser.find_element(By.TAG_NAME, "body").text jsonp2 = json.loads(content2) self.assertEqual( @@ -118,21 +119,21 @@ def test_request_to_api_frontend(self): url = "/api/entry/" self.browser.get(self.server_url + url) - req_info = self.browser.find_element_by_css_selector(".request-info").text + req_info = self.browser.find_element(By.CSS_SELECTOR, ".request-info").text self.assertIn("GET", req_info) self.assertIn(url, req_info) - response = self.browser.find_element_by_css_selector(".response-info").text + response = self.browser.find_element(By.CSS_SELECTOR, ".response-info").text match = re.search("[\{\[]", response) json_frontend = json.loads(response[match.start() :]) - self.browser.find_element_by_css_selector(".format-selection button").click() + self.browser.find_element(By.CSS_SELECTOR, ".format-selection button").click() self.click_link_and_wait( - self.browser.find_element_by_css_selector(".js-tooltip.format-option") + self.browser.find_element(By.CSS_SELECTOR, ".js-tooltip.format-option") ) - content = self.browser.find_element_by_tag_name("body").text + content = self.browser.find_element(By.TAG_NAME, "body").text jsonp = json.loads(content) From 57bde775cc560fe989c44f10f102c20af447cc60 Mon Sep 17 00:00:00 2001 From: "Gustavo A. Salazar" Date: Fri, 19 Aug 2022 16:08:13 +0100 Subject: [PATCH 05/26] deps bump 3 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index acbecc37..3414f1b2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,7 +3,7 @@ djangorestframework==3.13.1 PyYAML==6.0 jsonfield2==4.0.0.post0 pymysql==1.0.2 -django-cors-headers==3.13.0 +django-cors-headers==3.10.0 gunicorn==20.1.0 django-redis==5.2.0 redis==4.3.4 From 748e97ebce6eb71c2de274d3084b64b7a0719905 Mon Sep 17 00:00:00 2001 From: "Gustavo A. Salazar" Date: Fri, 19 Aug 2022 16:18:24 +0100 Subject: [PATCH 06/26] deps bump 4 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 3414f1b2..2a6611bc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,4 +7,4 @@ django-cors-headers==3.10.0 gunicorn==20.1.0 django-redis==5.2.0 redis==4.3.4 -requests==2.28.1 +requests==2.27.1 From 5dd3eaff1f94f8ba0c090877434402990ec6167d Mon Sep 17 00:00:00 2001 From: "Gustavo A. Salazar" Date: Fri, 19 Aug 2022 16:30:24 +0100 Subject: [PATCH 07/26] deps bump 5 --- dev_requirements.txt | 2 +- requirements.txt | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dev_requirements.txt b/dev_requirements.txt index 45db826c..87be9cd1 100644 --- a/dev_requirements.txt +++ b/dev_requirements.txt @@ -1,4 +1,4 @@ -selenium==4.4.3 +selenium~=4.0 django-debug-toolbar==3.6.0 ipdb==0.13.9 coveralls==3.3.1 diff --git a/requirements.txt b/requirements.txt index 2a6611bc..ab33934b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,8 +3,8 @@ djangorestframework==3.13.1 PyYAML==6.0 jsonfield2==4.0.0.post0 pymysql==1.0.2 -django-cors-headers==3.10.0 +django-cors-headers~=3.10 gunicorn==20.1.0 django-redis==5.2.0 redis==4.3.4 -requests==2.27.1 +requests~=2.27 From a996bb8f1f66e4c744044591c012927fa0aacdc8 Mon Sep 17 00:00:00 2001 From: "Gustavo A. Salazar" Date: Fri, 19 Aug 2022 16:48:24 +0100 Subject: [PATCH 08/26] deps bump 6 --- dev_requirements.txt | 5 +++-- requirements.txt | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/dev_requirements.txt b/dev_requirements.txt index 87be9cd1..2d05fa1e 100644 --- a/dev_requirements.txt +++ b/dev_requirements.txt @@ -1,5 +1,6 @@ -selenium~=4.0 -django-debug-toolbar==3.6.0 +# remember to check the version is compatible with python 3.6 +selenium~=3.141 +django-debug-toolbar~=3.2 ipdb==0.13.9 coveralls==3.3.1 tqdm==4.64.0 diff --git a/requirements.txt b/requirements.txt index ab33934b..be952790 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +# remember to check the version is compatible with python 3.6 Django==3.2.15 djangorestframework==3.13.1 PyYAML==6.0 From 2fc0aa5a2976f0722eccc6a9e2a6a709ca1e725e Mon Sep 17 00:00:00 2001 From: "Gustavo A. Salazar" Date: Mon, 22 Aug 2022 16:09:03 +0100 Subject: [PATCH 09/26] counter fro subfamily --- webfront/tests/fixtures_entry.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/webfront/tests/fixtures_entry.json b/webfront/tests/fixtures_entry.json index af56a570..022e2aa5 100644 --- a/webfront/tests/fixtures_entry.json +++ b/webfront/tests/fixtures_entry.json @@ -653,8 +653,8 @@ "literature": {}, "cross_references": {}, "counts": { - "proteins": 0, - "structures": 0, + "proteins": 1, + "structures": 2, "taxa": 0, "proteomes": 0, "sets": 0, From 9540c474f2378f09234730bc2a5735b23abf02ea Mon Sep 17 00:00:00 2001 From: "Gustavo A. Salazar" Date: Wed, 24 Aug 2022 13:37:37 +0100 Subject: [PATCH 10/26] typo in fixtures --- webfront/tests/relationship_features.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/webfront/tests/relationship_features.json b/webfront/tests/relationship_features.json index 47746e57..b812109c 100644 --- a/webfront/tests/relationship_features.json +++ b/webfront/tests/relationship_features.json @@ -174,7 +174,7 @@ ], "subfamily": { "name": "NITRATE/NITRITE RESPONSE REGULATOR PROTEIN NARL", - "accerssion":"PTHR43214:sf24" + "accession":"PTHR43214:sf24" } } ] From 68656642e4133533987a1409dadadbdfaa4dce4b Mon Sep 17 00:00:00 2001 From: Matthias Blum Date: Tue, 30 Aug 2022 11:33:34 +0100 Subject: [PATCH 11/26] Change type of num_sequences field to integer --- webfront/models/interpro_new.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/webfront/models/interpro_new.py b/webfront/models/interpro_new.py index 3b7f97ce..2cb5df5c 100644 --- a/webfront/models/interpro_new.py +++ b/webfront/models/interpro_new.py @@ -65,7 +65,7 @@ class EntryAnnotation(models.Model): type = models.CharField(max_length=32) value = models.BinaryField() mime_type = models.CharField(max_length=32) - num_sequences = models.FloatField(null=True) + num_sequences = models.IntegerField(null=True) class Protein(models.Model): From f860697328197698119df86c8d616218e712bfe5 Mon Sep 17 00:00:00 2001 From: Matthias Blum Date: Tue, 30 Aug 2022 11:34:54 +0100 Subject: [PATCH 12/26] Return the number of sequences for each entry annotation --- webfront/serializers/interpro.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/webfront/serializers/interpro.py b/webfront/serializers/interpro.py index a5be7b9a..238555e0 100644 --- a/webfront/serializers/interpro.py +++ b/webfront/serializers/interpro.py @@ -204,9 +204,10 @@ def reformat_cross_references(cross_references): @staticmethod def to_metadata_representation(instance, searcher, sq, counters=None): results = EntryAnnotation.objects.filter(accession=instance.accession).only( - "type" + "type", + "num_sequences" ) - annotation_types = [x.type for x in results] + annotation_types = {x.type: x.num_sequences or 0 for x in results} if counters is None: counters = EntrySerializer.get_counters(instance, searcher, sq) From e7d7c520c67c05d1b9a56d05e1356781a4b7e0d6 Mon Sep 17 00:00:00 2001 From: Matthias Blum Date: Tue, 30 Aug 2022 11:37:52 +0100 Subject: [PATCH 13/26] Add tests for entry annotations --- webfront/tests/tests_entry_endpoint.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/webfront/tests/tests_entry_endpoint.py b/webfront/tests/tests_entry_endpoint.py index e438302f..73129e09 100644 --- a/webfront/tests/tests_entry_endpoint.py +++ b/webfront/tests/tests_entry_endpoint.py @@ -145,6 +145,12 @@ def test_can_read_entry_pfam_id(self): self.assertIn("metadata", response.data.keys()) self.assertIn("counters", response.data["metadata"].keys()) self.assertIn("proteins", response.data["metadata"]["counters"].keys()) + self.assertIn("entry_annotations", response.data["metadata"].keys()) + self.assertIsInstance(response.data["metadata"]["entry_annotations"], + dict) + for k, v in response.data["metadata"]["entry_annotations"].items(): + self.assertIsInstance(k, str) + self.assertIsInstance(v, int) self._check_entry_details(response.data["metadata"]) def test_can_read_entry_unintegrated_pfam_id(self): From a38cc10b4ce5d587690b2ca42d785943c844dc5a Mon Sep 17 00:00:00 2001 From: Matthias Blum Date: Tue, 30 Aug 2022 14:19:06 +0100 Subject: [PATCH 14/26] Allow specifying the path to the chromedriver binary --- functional_tests/base.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/functional_tests/base.py b/functional_tests/base.py index 4b366eed..64fadf16 100644 --- a/functional_tests/base.py +++ b/functional_tests/base.py @@ -47,7 +47,14 @@ def setUp(self): if os.environ["BROWSER_TEST"] == "chrome": chrome_options = Options() chrome_options.add_argument("--headless") - self.browser = webdriver.Chrome(chrome_options=chrome_options) + + if "BROWSER_TEST_PATH" in os.environ: + self.browser = webdriver.Chrome( + executable_path=os.environ["BROWSER_TEST_PATH"], + chrome_options=chrome_options) + else: + self.browser = webdriver.Chrome( + chrome_options=chrome_options) else: raise KeyError except KeyError: From f165f37af3d6c7075f3f4732e69a1f452ca3253e Mon Sep 17 00:00:00 2001 From: Matthias Blum Date: Wed, 31 Aug 2022 16:35:41 +0100 Subject: [PATCH 15/26] Add migration --- ...0020_alter_entryannotation_num_sequences.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 webfront/migrations/0020_alter_entryannotation_num_sequences.py diff --git a/webfront/migrations/0020_alter_entryannotation_num_sequences.py b/webfront/migrations/0020_alter_entryannotation_num_sequences.py new file mode 100644 index 00000000..e318c838 --- /dev/null +++ b/webfront/migrations/0020_alter_entryannotation_num_sequences.py @@ -0,0 +1,18 @@ +# Generated by Django 3.2.12 on 2022-08-31 15:35 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('webfront', '0019_entrytaxa_table'), + ] + + operations = [ + migrations.AlterField( + model_name='entryannotation', + name='num_sequences', + field=models.IntegerField(null=True), + ), + ] From 957c61346e62b1309fe90dbd025b2551519ac335 Mon Sep 17 00:00:00 2001 From: Matthias Blum Date: Thu, 1 Sep 2022 15:30:39 +0100 Subject: [PATCH 16/26] Update developer documentation --- deploy_tools/README.md | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/deploy_tools/README.md b/deploy_tools/README.md index a6f7bbb0..59159d6a 100644 --- a/deploy_tools/README.md +++ b/deploy_tools/README.md @@ -133,20 +133,35 @@ Assume we have a user account at /home/username ## Testing -* The unit tests are located in ```[project]/source/webfront/tests/tests.py``` +The unit tests are located in `[project]/source/webfront/tests/`. - To run unit tests use ```../virtualenv/bin/python manage.py test webfront``` +To run unit tests use -* The functional test are in ```[project]/functional_tests/tests.py``` and they are configured to firefox, so you need - to have it installed in your machine +```sh +../virtualenv/bin/python manage.py test webfront +``` + +The functional test are in `[project]/functional_tests` and are configured to Google Chrome (or Chromium), so you need to have it installed in your machine. - To run functional tests use ```../virtualenv/bin/python manage.py test functional_tests``` +To run functional tests use -* As a reference [HERE](https://docs.google.com/presentation/d/13_a6IbTq8KPGRH5AhsauEDJt4jEXNsT7DFdg1PNn4_I/edit?usp=sharing) is a graphic describing the fixtures. +```sh +export BROWSER_TEST="chrome" + +# Only required if ChromeDriver is not in your PATH +# or if its binary is not `chromedriver` (e.g. `chromium.chromedriver`) +export BROWSER_TEST_PATH="/path/to/chromedriver" + +../virtualenv/bin/python manage.py test functional_tests +``` + +As a reference [HERE](https://docs.google.com/presentation/d/13_a6IbTq8KPGRH5AhsauEDJt4jEXNsT7DFdg1PNn4_I/edit?usp=sharing) is a graphic describing the fixtures. All the test can be run at the same time: -```../virtualenv/bin/python manage.py test``` +```sh +../virtualenv/bin/python manage.py test +``` ## Setting up real data (MySQL - elasticsearch) From d5ba7a62cd4a5eb96e9a4032a75840f4161afa0b Mon Sep 17 00:00:00 2001 From: "Gustavo A. Salazar" Date: Mon, 5 Sep 2022 16:54:05 +0100 Subject: [PATCH 17/26] subfamilies: including the total number of subfamilies --- webfront/views/modifiers.py | 1 + 1 file changed, 1 insertion(+) diff --git a/webfront/views/modifiers.py b/webfront/views/modifiers.py index 5fec197d..5189b0af 100644 --- a/webfront/views/modifiers.py +++ b/webfront/views/modifiers.py @@ -860,6 +860,7 @@ def get_subfamilies(value, general_handler): raise EmptyQuerysetError( "There is are not subfamilies for this entry" ) + general_handler.modifiers.search_size = len(entries) return entries def passing(x, y): From 1fdbf66db3c880f60538851937418d8b0f3d48ab Mon Sep 17 00:00:00 2001 From: "Gustavo A. Salazar" Date: Wed, 7 Sep 2022 12:25:53 +0100 Subject: [PATCH 18/26] support for funfams as subfamilies --- webfront/tests/fixtures_entry.json | 58 ++++++++++++++++++++++++++ webfront/tests/tests_entry_endpoint.py | 2 +- webfront/tests/tests_modifiers.py | 36 +++++++++++----- webfront/views/modifiers.py | 2 +- 4 files changed, 86 insertions(+), 12 deletions(-) diff --git a/webfront/tests/fixtures_entry.json b/webfront/tests/fixtures_entry.json index 022e2aa5..fb5d605a 100644 --- a/webfront/tests/fixtures_entry.json +++ b/webfront/tests/fixtures_entry.json @@ -692,6 +692,64 @@ "sets": 0, "domain_architectures": 0 } + } }, + { + "model": "webfront.Entry", + "fields": { + "entry_id": null, + "accession": "G3DSA:1.10.10.10", + "type": "homologous_superfamily", + "name": "Winged helix-like DNA-binding domain superfamily/Winged helix DNA-binding domain", + "short_name": "G3DSA:1.10.10.10", + "go_terms": [], + "entry_date": "2004-03-02T00:00:00Z", + "is_featured": true, + "is_alive": true, + "source_database": "cathgene3d", + "member_databases": null, + "integrated": null, + "description": [], + "wikipedia": null, + "literature": {}, + "cross_references": {}, + "counts": { + "proteins": 1, + "structures": 2, + "taxa": 0, + "proteomes": 0, + "sets": 0, + "domain_architectures": 0, + "subfamilies": 1 + } + } + }, + { + "model": "webfront.Entry", + "fields": { + "entry_id": null, + "accession": "G3DSA:1.10.10.10:1", + "type": "funfam", + "name": "LysR family transcriptional regulator", + "short_name": "G3DSA:1.10.10.10:1", + "go_terms": [], + "entry_date": "2004-03-02T00:00:00Z", + "is_featured": true, + "is_alive": false, + "source_database": "cathgene3d", + "member_databases": null, + "integrated": "G3DSA:1.10.10.10", + "description": [], + "wikipedia": null, + "literature": {}, + "cross_references": {}, + "counts": { + "proteins": 0, + "structures": 0, + "taxa": 0, + "proteomes": 0, + "sets": 0, + "domain_architectures": 0 + } } } ] diff --git a/webfront/tests/tests_entry_endpoint.py b/webfront/tests/tests_entry_endpoint.py index bbbb844a..fb728fac 100644 --- a/webfront/tests/tests_entry_endpoint.py +++ b/webfront/tests/tests_entry_endpoint.py @@ -51,7 +51,7 @@ def test_can_read_entry_unintegrated(self): response = self.client.get("/api/entry/unintegrated") self.assertEqual(response.status_code, status.HTTP_200_OK) self._check_is_list_of_objects_with_key(response.data["results"], "metadata") - self.assertEqual(len(response.data["results"]), 5) + self.assertEqual(len(response.data["results"]), 6) def test_can_read_entry_interpro_id(self): acc = "IPR003165" diff --git a/webfront/tests/tests_modifiers.py b/webfront/tests/tests_modifiers.py index 946f0c07..1afe5325 100644 --- a/webfront/tests/tests_modifiers.py +++ b/webfront/tests/tests_modifiers.py @@ -506,16 +506,32 @@ def test_model_lddt_modifier(self): self.assertEqual(3, len(data)) self.assertTrue(all([0 <= item <= 1 for item in data])) -class PantherSubfamilyTest(InterproRESTTestCase): + +class SubfamiliesTest(InterproRESTTestCase): + entries = [ + {"db": "panther", "acc": "PTHR43214", "sf": "PTHR43214:sf24"}, + {"db": "cathgene3d", "acc": "G3DSA:1.10.10.10", "sf": "G3DSA:1.10.10.10:1"}, + ] + def test_subfamilies_counter(self): - response = self.client.get("/api/entry/panther/PTHR43214") - self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertIn("counters", response.data["metadata"]) - self.assertEqual(response.data["metadata"]["counters"]["subfamilies"], 1) - # self.assertEqual(len(response.data["results"]), 1) + for entry in self.entries: + response = self.client.get(f"/api/entry/{entry['db']}/{entry['acc']}") + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertIn("counters", response.data["metadata"]) + self.assertEqual(response.data["metadata"]["counters"]["subfamilies"], 1) + def test_panther_subfamilies(self): - response = self.client.get("/api/entry/panther/PTHR43214?subfamilies") + for entry in self.entries: + response = self.client.get(f"/api/entry/{entry['db']}/{entry['acc']}?subfamilies") + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(len(response.data["results"]), 1) + self.assertEqual(response.data["results"][0]["metadata"]["accession"], entry['sf']) + self.assertEqual(response.data["results"][0]["metadata"]["integrated"], entry['acc']) + + def test_no_subfamilies_in_pfam(self): + response = self.client.get(f"/api/entry/pfam/PF02171") self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertEqual(len(response.data["results"]), 1) - self.assertEqual(response.data["results"][0]["metadata"]["accession"], "PTHR43214:sf24") - self.assertEqual(response.data["results"][0]["metadata"]["integrated"], "PTHR43214") + self.assertIn("counters", response.data["metadata"]) + self.assertNotIn("subfamilies", response.data["metadata"]["counters"]) + response2 = self.client.get(f"/api/entry/pfam/PF02171?subfamilies") + self.assertEqual(response2.status_code, status.HTTP_204_NO_CONTENT) diff --git a/webfront/views/modifiers.py b/webfront/views/modifiers.py index 5189b0af..6fbfcbaf 100644 --- a/webfront/views/modifiers.py +++ b/webfront/views/modifiers.py @@ -855,7 +855,7 @@ def get_model_structure(value, general_handler): def get_subfamilies(value, general_handler): queryset = general_handler.queryset_manager.get_queryset().first() - entries = Entry.objects.filter(integrated=queryset.accession, source_database='panther', is_alive=False) + entries = Entry.objects.filter(integrated=queryset.accession, is_alive=False) if len(entries) == 0: raise EmptyQuerysetError( "There is are not subfamilies for this entry" From d5c2f1cf9f57c0c9fb796e26f473e4b551c70292 Mon Sep 17 00:00:00 2001 From: Matthias Blum Date: Sun, 25 Sep 2022 22:09:06 +0100 Subject: [PATCH 19/26] Use pLDDT score to filter proteins with an AlphaFold model --- webfront/views/modifiers.py | 16 ++++++++++++++++ webfront/views/protein.py | 3 ++- webfront/views/queryset_manager.py | 2 +- 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/webfront/views/modifiers.py b/webfront/views/modifiers.py index 568279f7..cd1c66ff 100644 --- a/webfront/views/modifiers.py +++ b/webfront/views/modifiers.py @@ -276,6 +276,22 @@ def filter_by_entry_db(value, general_handler): return response.first() +def filter_by_min_value(endpoint, field, value, sort_direction=None): + def x(_, general_handler): + general_handler.queryset_manager.add_filter( + endpoint, + **{ + "{}__gte".format(field): value + }, + ) + if sort_direction in ("asc", "desc"): + general_handler.queryset_manager.order_by("{}:{}".format(field, sort_direction)) + elif sort_direction is not None: + raise ValueError("{} is not a valid sorting order".format(sort_direction)) + + return x + + def filter_by_boolean_field(endpoint, field): def x(value, general_handler): if value.lower() == "false": diff --git a/webfront/views/protein.py b/webfront/views/protein.py index b54bd4c6..e08e6528 100644 --- a/webfront/views/protein.py +++ b/webfront/views/protein.py @@ -12,6 +12,7 @@ filter_by_domain_architectures, filter_by_contains_field, filter_by_match_presence, + filter_by_min_value, add_extra_fields, get_isoforms, calculate_residue_conservation, @@ -281,7 +282,7 @@ def get( "is_fragment", filter_by_boolean_field("protein", "is_fragment") ) general_handler.modifiers.register( - "has_model", filter_by_boolean_field("protein", "has_model") + "has_model", filter_by_min_value("protein", "protein_af_score", 0, "desc") ) return super(ProteinHandler, self).get( diff --git a/webfront/views/queryset_manager.py b/webfront/views/queryset_manager.py index ba6da3a9..0e767fba 100644 --- a/webfront/views/queryset_manager.py +++ b/webfront/views/queryset_manager.py @@ -132,7 +132,7 @@ def get_searcher_query(self, include_search=False, use_lineage=False): ) elif k == "type" or k == "type__iexact" or k == "type__exact": blocks.append("{}_type:{}".format(ep, escape(v))) - elif k in ("is_fragment", "has_model"): + elif k == "is_fragment": blocks.append("{}_{}:{}".format(ep, k, escape(v))) elif k == "tax_id" or k == "tax_id__iexact" or k == "tax_id__contains": blocks.append("tax_id:{}".format(escape(v))) From 7ed9c5a3eeb1bbaa619f13cdcb2deb86643d4e2b Mon Sep 17 00:00:00 2001 From: Matthias Blum Date: Mon, 26 Sep 2022 14:12:01 +0100 Subject: [PATCH 20/26] Force sort_direction to be asc or desc --- webfront/views/modifiers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/webfront/views/modifiers.py b/webfront/views/modifiers.py index cd1c66ff..6104ae96 100644 --- a/webfront/views/modifiers.py +++ b/webfront/views/modifiers.py @@ -276,7 +276,7 @@ def filter_by_entry_db(value, general_handler): return response.first() -def filter_by_min_value(endpoint, field, value, sort_direction=None): +def filter_by_min_value(endpoint, field, value, sort_direction="asc"): def x(_, general_handler): general_handler.queryset_manager.add_filter( endpoint, @@ -286,7 +286,7 @@ def x(_, general_handler): ) if sort_direction in ("asc", "desc"): general_handler.queryset_manager.order_by("{}:{}".format(field, sort_direction)) - elif sort_direction is not None: + else: raise ValueError("{} is not a valid sorting order".format(sort_direction)) return x From b6dabe97b618a657ade2fb53f1751ddcea039ec4 Mon Sep 17 00:00:00 2001 From: Matthias Blum Date: Mon, 26 Sep 2022 17:36:42 +0100 Subject: [PATCH 21/26] Enable custom ordering of buckets in composite aggregations --- webfront/searcher/elastic_controller.py | 117 +++++++++++++++++------- 1 file changed, 83 insertions(+), 34 deletions(-) diff --git a/webfront/searcher/elastic_controller.py b/webfront/searcher/elastic_controller.py index 53b76cfa..dc21d95c 100644 --- a/webfront/searcher/elastic_controller.py +++ b/webfront/searcher/elastic_controller.py @@ -12,14 +12,44 @@ es_results = list() +def parseCursor(cursor): + fields = {} + for item in cursor.split(","): + k, t, v = item.split(":") + if t == "f": + fields[k] = float(v) + elif t == "i": + fields[k] = int(v) + else: + fields[k] = v.lower() + + return fields + + +def encodeCursor(keys): + if not keys: + return None + output = [] + for k, v in keys.items(): + if isinstance(v, float): + t = "f" + elif isinstance(v, int): + t = "i" + else: + t = "s" + output.append("{}:{}:{}".format(k, t, v)) + + return ",".join(output) + + def getAfterBeforeFromCursor(cursor): - after = None - before = None + after = {} + before = {} if cursor is not None: if cursor[0] == "-": - before = cursor[1:] + before = parseCursor(cursor[1:]) else: - after = cursor + after = parseCursor(cursor) return after, before @@ -276,7 +306,7 @@ def get_group_obj_copy_of_field_by_query( "size": 0, } after, before = getAfterBeforeFromCursor(cursor) - self.addAfterKeyToQueryComposite( + reset_direction = self.addAfterKeyToQueryComposite( facet["aggs"]["groups"]["composite"], after, before ) if inner_field_to_count is not None: @@ -286,6 +316,8 @@ def get_group_obj_copy_of_field_by_query( if fq is not None: query += " && " + fq response = self._elastic_json_query(query, facet) + if reset_direction: + self.reverseOrderDirection(facet["aggs"]["groups"]["composite"]) after_key = self.getAfterKey(response, facet, before, query) before_key = self.getBeforeKey(response, facet, before, query) buckets = response["aggregations"]["groups"]["buckets"] @@ -325,8 +357,24 @@ def get_list_of_endpoint(self, endpoint, query=None, rows=10, start=0, cursor=No }, "size": 0, } + + # Sort buckets by custom field + match = re.search(r"&?sort=(\w+):(\w+)", qs) + if match: + field, direction = match.groups() + if field != facet["aggs"]["ngroups"]["cardinality"]["field"]: + # Custom field takes priority over default one ('source') + facet["aggs"]["groups"]["composite"]["sources"].insert(0, { + field: { + "terms": { + "field": field, + "order": direction + } + } + }) + after, before = getAfterBeforeFromCursor(cursor) - self.addAfterKeyToQueryComposite( + reset_direction = self.addAfterKeyToQueryComposite( facet["aggs"]["groups"]["composite"], after, before ) if endpoint == "organism" or endpoint == "taxonomy": @@ -345,6 +393,8 @@ def get_list_of_endpoint(self, endpoint, query=None, rows=10, start=0, cursor=No str(x["key"]["source"]).lower() for x in response["aggregations"]["groups"]["buckets"] ] + if reset_direction: + self.reverseOrderDirection(facet["aggs"]["groups"]["composite"]) after_key = self.getAfterKey(response, facet, before, qs) before_key = self.getBeforeKey(response, facet, before, qs) return accessions, count, after_key, before_key @@ -452,53 +502,52 @@ def _elastic_json_query(self, q, query_obj=None, is_ida=False): return obj def addAfterKeyToQueryComposite(self, composite, after, before): - if after is not None: - composite["after"] = {"source": after.lower()} - elif before is not None: - composite["after"] = {"source": before.lower()} - composite["sources"][0]["source"]["terms"]["order"] = "desc" + if after: + composite["after"] = after + return False + elif before: + composite["after"] = before + self.reverseOrderDirection(composite) + return True def getAfterKey(self, response, facet, before, qs): after_key = None - if before is not None: + if before: try: - after_key = response["aggregations"]["groups"]["buckets"][0]["key"][ - "source" - ] + after_key = response["aggregations"]["groups"]["buckets"][0]["key"] except: pass elif "after_key" in response["aggregations"]["groups"]: - after_key = response["aggregations"]["groups"]["after_key"]["source"] + after_key = response["aggregations"]["groups"]["after_key"] if after_key is not None: - facet["aggs"]["groups"]["composite"]["after"] = {"source": after_key} - facet["aggs"]["groups"]["composite"]["sources"][0]["source"]["terms"][ - "order" - ] = "asc" + facet["aggs"]["groups"]["composite"]["after"] = after_key next_response = self._elastic_json_query(qs, facet) if len(next_response["aggregations"]["groups"]["buckets"]) == 0: after_key = None - return after_key + return encodeCursor(after_key) def getBeforeKey(self, response, facet, before, qs): before_key = None try: - if before is not None: - before_key = response["aggregations"]["groups"]["buckets"][-1]["key"][ - "source" - ] + if before: + before_key = response["aggregations"]["groups"]["buckets"][-1]["key"] else: - before_key = response["aggregations"]["groups"]["buckets"][0]["key"][ - "source" - ] + before_key = response["aggregations"]["groups"]["buckets"][0]["key"] except: pass - if before_key is not None: - facet["aggs"]["groups"]["composite"]["after"] = {"source": before_key} - facet["aggs"]["groups"]["composite"]["sources"][0]["source"]["terms"][ - "order" - ] = "desc" + if before_key: + facet["aggs"]["groups"]["composite"]["after"] = before_key + self.reverseOrderDirection(facet["aggs"]["groups"]["composite"]) prev_response = self._elastic_json_query(qs, facet) if len(prev_response["aggregations"]["groups"]["buckets"]) == 0: before_key = None - return before_key + return encodeCursor(before_key) + + def reverseOrderDirection(self, composite): + for field in composite["sources"]: + for k, v in field.items(): + if v["terms"].get("order", "asc") == "asc": + v["terms"]["order"] = "desc" + else: + v["terms"]["order"] = "asc" From 17feca7b79630b91a453e7afa69b07454e3b18d6 Mon Sep 17 00:00:00 2001 From: "Gustavo A. Salazar" Date: Mon, 3 Oct 2022 11:08:23 +0100 Subject: [PATCH 22/26] sorting the payload at pagination time based on elastic --- webfront/pagination.py | 26 +++++++++++++++++++++++- webfront/views/custom.py | 32 ++++++++++++++++++++---------- webfront/views/queryset_manager.py | 4 ++++ 3 files changed, 50 insertions(+), 12 deletions(-) diff --git a/webfront/pagination.py b/webfront/pagination.py index c7b7615b..690b94b9 100644 --- a/webfront/pagination.py +++ b/webfront/pagination.py @@ -31,19 +31,39 @@ class CustomPagination(CursorPagination): current_size = None after_key = None before_key = None + elastic_result = None def get_paginated_response(self, data): base = [ ("count", self.current_size), ("next", self.get_next_link()), ("previous", self.get_previous_link()), - ("results", data["data"]), + ("results", self._sortBasedOnElastic(data["data"])), ] if "extensions" in data and len(data["extensions"]) > 0: for ext in data["extensions"]: base.append((ext, data["extensions"][ext])) return Response(OrderedDict(base)) + # If there is data in elastic_result, implies that the wueryset was created by querying elastic first. + # This method uses the list of accession retrieved via elastic to order the results. + def _sortBasedOnElastic(self, data): + if self.elastic_result is None: + return data + ordered_data = [] + for acc in self.elastic_result: + obj = next( + filter( + lambda item: item.get("metadata", {}).get("accession", "").lower() + == acc.lower(), + data, + ), + None, + ) + if obj is not None: + ordered_data.append(obj) + return ordered_data + def _get_position_from_instance(self, instance, ordering): if type(instance) == tuple: return instance[0] @@ -51,9 +71,11 @@ def _get_position_from_instance(self, instance, ordering): instance, ordering ) + # Extract some values passed as kwargs before invoking the implementation in the super class def paginate_queryset(self, queryset, request, **kwargs): self.current_size = None self.after_key = None + self.elastic_result = None if ( hasattr(queryset, "model") and queryset.model._meta.ordering != [] @@ -69,6 +91,8 @@ def paginate_queryset(self, queryset, request, **kwargs): self.after_key = kwargs["after_key"] if "before_key" in kwargs and kwargs["before_key"] is not None: self.before_key = kwargs["before_key"] + if "elastic_result" in kwargs and kwargs["elastic_result"] is not None: + self.elastic_result = kwargs["elastic_result"] return super(CustomPagination, self).paginate_queryset( queryset, request, kwargs["view"] ) diff --git a/webfront/views/custom.py b/webfront/views/custom.py index fdb2c61c..05e7cf4d 100644 --- a/webfront/views/custom.py +++ b/webfront/views/custom.py @@ -50,6 +50,7 @@ class CustomView(GenericAPIView): serializer_detail_filter = SerializerDetail.ALL after_key = None before_key = None + elastic_result = None http_method_names = ["get", "head"] def get( @@ -68,13 +69,15 @@ def get( # if this is the last level if len(endpoint_levels) == level: searcher = general_handler.searcher + # Executes all the modifiers, some add filters to the query set but others might replace it. has_payload = general_handler.modifiers.execute(drf_request) logger.debug(request.get_full_path()) + # If there is a payload from the modifiers, it has its own serializer if has_payload or general_handler.modifiers.serializer is not None: self.serializer_detail = general_handler.modifiers.serializer - # self.many = general_handler.modifiers.many if general_handler.modifiers.many is not None: self.many = general_handler.modifiers.many + # When there is a payload from the modifiers. It should build the response with it if has_payload: if general_handler.modifiers.payload is None: raise EmptyQuerysetError( @@ -91,8 +94,9 @@ def get( mime_type = annotation.mime_type anno_type = annotation.type anno_value = annotation.value - response = HttpResponse(content=anno_value, - content_type=mime_type) + response = HttpResponse( + content=anno_value, content_type=mime_type + ) if anno_type.startswith(("alignment:", "model:")): if "download" in request.GET: @@ -117,6 +121,8 @@ def get( ) elif self.from_model: + # Single endpoints don't require Elasticsearch, so they can be resolved + # by normal Django means(i.e. just the MySQL model) if ( is_single_endpoint(general_handler) or not self.expected_response_is_list() @@ -126,13 +132,10 @@ def get( ) self.search_size = self.queryset.count() else: + # It uses multiple endpoints, so we need to use the elastic index self.update_queryset_from_search(searcher, general_handler) if self.queryset.count() == 0: - # if 0 == general_handler.queryset_manager.get_queryset(only_main_endpoint=True).count(): - # raise Exception("The URL requested didn't have any data related.\nList of endpoints: {}" - # .format(endpoint_levels)) - raise EmptyQuerysetError( "There is no data associated with the requested URL.\nList of endpoints: {}".format( endpoint_levels @@ -143,15 +146,17 @@ def get( self.get_queryset(), drf_request, view=self, + # passing data of the elastic result to the pagination instance search_size=self.search_size, after_key=self.after_key, before_key=self.before_key, + elastic_result=self.elastic_result, ) else: self.queryset = self.get_queryset().first() else: if endpoint_levels[0] != "utils": - # if it gets here it is a endpoint request checking for database contributions. + # if it gets here it is an endpoint request checking for database contributions. self.queryset = self.get_counter_response(general_handler, searcher) serialized = self.serializer_class( @@ -321,22 +326,27 @@ def expected_response_is_list(self): search_size = None + # Queries the elastic searcher core to get a list of accessions of the main endpoint, + # then builds a queryset matching those accessions. + # This is the main connection point between elastic and MySQL def update_queryset_from_search(self, searcher, general_handler): ep = general_handler.queryset_manager.main_endpoint s = general_handler.pagination["size"] cursor = general_handler.pagination["cursor"] qs = general_handler.queryset_manager.get_searcher_query(include_search=True) - res, length, after_key, before_key = searcher.get_list_of_endpoint( + elastic_result, length, after_key, before_key = searcher.get_list_of_endpoint( ep, rows=s, query=qs, cursor=cursor ) - self.queryset = general_handler.queryset_manager.get_base_queryset(ep) - self.queryset = filter_queryset_accession_in(self.queryset, res) + self.queryset = general_handler.queryset_manager.get_base_queryset(ep) + self.queryset = filter_queryset_accession_in(self.queryset, elastic_result) + # This values get store in the instance attributes, so they can be recovered on the pagination stage. self.search_size = length self.after_key = after_key self.before_key = before_key + self.elastic_result = elastic_result def filter_queryset_accession_in(queryset, list): diff --git a/webfront/views/queryset_manager.py b/webfront/views/queryset_manager.py index 0e767fba..d424b01f 100644 --- a/webfront/views/queryset_manager.py +++ b/webfront/views/queryset_manager.py @@ -69,6 +69,8 @@ def remove_filter(self, endpoint, f): def order_by(self, field): self.order_field = field + # Generates a query string for elasticsearch from the registered queryset filters. + # It explicitely goes through all the filters and create the query string case by case. def get_searcher_query(self, include_search=False, use_lineage=False): blocks = [] for ep in self.filters: @@ -162,9 +164,11 @@ def get_searcher_query(self, include_search=False, use_lineage=False): if k == "source_database" or k == "source_database__iexact": blocks.append("{}_db:{}".format(ep, escape(v))) + # Normalizes the blocks(sorts and lower) and joins them with ' && '. blocks = list(set(blocks)) blocks.sort() q = " && ".join(blocks).lower() + if self.order_field is not None: q += "&sort=" + self.order_field return q From c323ec910cda48fca3327c7fbed32d003b86e958 Mon Sep 17 00:00:00 2001 From: "Gustavo A. Salazar" Date: Mon, 3 Oct 2022 12:12:36 +0100 Subject: [PATCH 23/26] only applying the elastic order when extra order is impose in the search --- webfront/searcher/elastic_controller.py | 4 +++- webfront/views/custom.py | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/webfront/searcher/elastic_controller.py b/webfront/searcher/elastic_controller.py index dc21d95c..d20e701a 100644 --- a/webfront/searcher/elastic_controller.py +++ b/webfront/searcher/elastic_controller.py @@ -340,6 +340,7 @@ def get_group_obj_copy_of_field_by_query( return output def get_list_of_endpoint(self, endpoint, query=None, rows=10, start=0, cursor=None): + should_keep_elastic_order = False qs = self.queryset_manager.get_searcher_query() if query is None else query if qs == "": qs = "*:*" @@ -372,6 +373,7 @@ def get_list_of_endpoint(self, endpoint, query=None, rows=10, start=0, cursor=No } } }) + should_keep_elastic_order = True after, before = getAfterBeforeFromCursor(cursor) reset_direction = self.addAfterKeyToQueryComposite( @@ -397,7 +399,7 @@ def get_list_of_endpoint(self, endpoint, query=None, rows=10, start=0, cursor=No self.reverseOrderDirection(facet["aggs"]["groups"]["composite"]) after_key = self.getAfterKey(response, facet, before, qs) before_key = self.getBeforeKey(response, facet, before, qs) - return accessions, count, after_key, before_key + return accessions, count, after_key, before_key,should_keep_elastic_order def get_chain(self): qs = self.queryset_manager.get_searcher_query() diff --git a/webfront/views/custom.py b/webfront/views/custom.py index 05e7cf4d..b94e208f 100644 --- a/webfront/views/custom.py +++ b/webfront/views/custom.py @@ -335,7 +335,7 @@ def update_queryset_from_search(self, searcher, general_handler): cursor = general_handler.pagination["cursor"] qs = general_handler.queryset_manager.get_searcher_query(include_search=True) - elastic_result, length, after_key, before_key = searcher.get_list_of_endpoint( + elastic_result, length, after_key, before_key, should_keep_elastic_order = searcher.get_list_of_endpoint( ep, rows=s, query=qs, cursor=cursor ) @@ -346,7 +346,7 @@ def update_queryset_from_search(self, searcher, general_handler): self.search_size = length self.after_key = after_key self.before_key = before_key - self.elastic_result = elastic_result + self.elastic_result = elastic_result if should_keep_elastic_order else None def filter_queryset_accession_in(queryset, list): From 316b8b0fd9b457f5b196509d5b36a951e07382b2 Mon Sep 17 00:00:00 2001 From: "Gustavo A. Salazar" Date: Mon, 3 Oct 2022 14:46:01 +0100 Subject: [PATCH 24/26] cleaning up my mess --- webfront/searcher/elastic_controller.py | 2 +- webfront/views/modifiers.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/webfront/searcher/elastic_controller.py b/webfront/searcher/elastic_controller.py index d20e701a..8eb9b69f 100644 --- a/webfront/searcher/elastic_controller.py +++ b/webfront/searcher/elastic_controller.py @@ -399,7 +399,7 @@ def get_list_of_endpoint(self, endpoint, query=None, rows=10, start=0, cursor=No self.reverseOrderDirection(facet["aggs"]["groups"]["composite"]) after_key = self.getAfterKey(response, facet, before, qs) before_key = self.getBeforeKey(response, facet, before, qs) - return accessions, count, after_key, before_key,should_keep_elastic_order + return accessions, count, after_key, before_key, should_keep_elastic_order def get_chain(self): qs = self.queryset_manager.get_searcher_query() diff --git a/webfront/views/modifiers.py b/webfront/views/modifiers.py index cf74f597..07a59e86 100644 --- a/webfront/views/modifiers.py +++ b/webfront/views/modifiers.py @@ -420,7 +420,7 @@ def filter_by_domain_architectures(field, general_handler): general_handler.queryset_manager.get_searcher_query() + " && ida_id:" + field ) endpoint = general_handler.queryset_manager.main_endpoint - res, length, after_key, before_key = searcher.get_list_of_endpoint( + res, length, after_key, before_key, _ = searcher.get_list_of_endpoint( endpoint, rows=size, query=query, cursor=cursor ) general_handler.modifiers.search_size = length From 9eeeb6a1a9e30b9beaf012f9c95a383c35061b2e Mon Sep 17 00:00:00 2001 From: "Gustavo A. Salazar" Date: Tue, 4 Oct 2022 13:04:49 +0100 Subject: [PATCH 25/26] cleanup and running black --- webfront/exceptions.py | 6 +- .../migrations/0010_wiki_field_type_change.py | 10 +-- .../0016_structural_model_algorithm.py | 14 ++- .../migrations/0017_structural_model_plddt.py | 10 +-- webfront/migrations/0018_taxa_modifier.py | 10 +-- webfront/migrations/0019_entrytaxa_table.py | 28 +++--- ...020_alter_entryannotation_num_sequences.py | 10 +-- webfront/pagination.py | 58 ------------ webfront/searcher/elastic_controller.py | 63 +------------ webfront/serializers/interpro.py | 3 +- webfront/templatetags/interpro_tags.py | 5 +- webfront/tests/fixtures_reader.py | 22 +++-- webfront/tests/test_mail.py | 18 ++-- .../tests/tests_3_endpoints_using_searcher.py | 4 +- webfront/tests/tests_entry_endpoint.py | 3 +- webfront/tests/tests_modifiers.py | 25 ++++-- .../tests_protein_endpoint_entry_filter.py | 4 +- .../tests_structure_endpoint_entry_filter.py | 12 ++- ...e_payload_structure_combining_endpoints.py | 8 +- webfront/tests/tests_utils_endpoint.py | 4 +- webfront/views/mail.py | 44 ++++----- webfront/views/modifiers.py | 89 ++++++++++++------- webfront/views/utils.py | 15 ++-- 23 files changed, 193 insertions(+), 272 deletions(-) diff --git a/webfront/exceptions.py b/webfront/exceptions.py index 3d1e93eb..9882cf7d 100644 --- a/webfront/exceptions.py +++ b/webfront/exceptions.py @@ -10,18 +10,22 @@ class EmptyQuerysetError(Exception): def __init__(self, message): self.message = message + class ExpectedUniqueError(Exception): def __init__(self, message): self.message = message + class HmmerWebError(Exception): def __init__(self, message): self.message = message + class BadURLParameterError(Exception): def __init__(self, message): self.message = message + class InvalidOperationRequest(Exception): def __init__(self, message): - self.message = message \ No newline at end of file + self.message = message diff --git a/webfront/migrations/0010_wiki_field_type_change.py b/webfront/migrations/0010_wiki_field_type_change.py index 624da118..0e0f02e4 100644 --- a/webfront/migrations/0010_wiki_field_type_change.py +++ b/webfront/migrations/0010_wiki_field_type_change.py @@ -6,14 +6,12 @@ class Migration(migrations.Migration): - dependencies = [ - ('webfront', '0009_entry_annotation_changes'), - ] + dependencies = [("webfront", "0009_entry_annotation_changes")] operations = [ migrations.AlterField( - model_name='entry', - name='wikipedia', + model_name="entry", + name="wikipedia", field=jsonfield.fields.JSONField(null=True), - ), + ) ] diff --git a/webfront/migrations/0016_structural_model_algorithm.py b/webfront/migrations/0016_structural_model_algorithm.py index 65dd959f..7191cac6 100644 --- a/webfront/migrations/0016_structural_model_algorithm.py +++ b/webfront/migrations/0016_structural_model_algorithm.py @@ -5,20 +5,16 @@ class Migration(migrations.Migration): - dependencies = [ - ('webfront', '0015_structural_model_lddt'), - ] + dependencies = [("webfront", "0015_structural_model_lddt")] operations = [ migrations.AddField( - model_name='structuralmodel', - name='algorithm', - field=models.CharField(default='trRosetta', max_length=20), + model_name="structuralmodel", + name="algorithm", + field=models.CharField(default="trRosetta", max_length=20), preserve_default=False, ), migrations.AlterField( - model_name='structuralmodel', - name='lddt', - field=models.BinaryField(), + model_name="structuralmodel", name="lddt", field=models.BinaryField() ), ] diff --git a/webfront/migrations/0017_structural_model_plddt.py b/webfront/migrations/0017_structural_model_plddt.py index d6544ac9..ac98cec9 100644 --- a/webfront/migrations/0017_structural_model_plddt.py +++ b/webfront/migrations/0017_structural_model_plddt.py @@ -5,14 +5,10 @@ class Migration(migrations.Migration): - dependencies = [ - ('webfront', '0016_structural_model_algorithm'), - ] + dependencies = [("webfront", "0016_structural_model_algorithm")] operations = [ migrations.RenameField( - model_name='structuralmodel', - old_name='lddt', - new_name='plddt', - ), + model_name="structuralmodel", old_name="lddt", new_name="plddt" + ) ] diff --git a/webfront/migrations/0018_taxa_modifier.py b/webfront/migrations/0018_taxa_modifier.py index 2f91e884..0e10bb2d 100644 --- a/webfront/migrations/0018_taxa_modifier.py +++ b/webfront/migrations/0018_taxa_modifier.py @@ -6,14 +6,10 @@ class Migration(migrations.Migration): - dependencies = [ - ('webfront', '0017_structural_model_plddt'), - ] + dependencies = [("webfront", "0017_structural_model_plddt")] operations = [ migrations.AddField( - model_name='entry', - name='taxa', - field=jsonfield.fields.JSONField(null=True), - ), + model_name="entry", name="taxa", field=jsonfield.fields.JSONField(null=True) + ) ] diff --git a/webfront/migrations/0019_entrytaxa_table.py b/webfront/migrations/0019_entrytaxa_table.py index c42ca6bd..6db09203 100644 --- a/webfront/migrations/0019_entrytaxa_table.py +++ b/webfront/migrations/0019_entrytaxa_table.py @@ -7,23 +7,25 @@ class Migration(migrations.Migration): - dependencies = [ - ('webfront', '0018_taxa_modifier'), - ] + dependencies = [("webfront", "0018_taxa_modifier")] operations = [ migrations.CreateModel( - name='EntryTaxa', + name="EntryTaxa", fields=[ - ('accession', models.OneToOneField(db_column='accession', on_delete=django.db.models.deletion.CASCADE, primary_key=True, serialize=False, to='webfront.entry')), - ('tree', jsonfield.fields.JSONField(null=True)), + ( + "accession", + models.OneToOneField( + db_column="accession", + on_delete=django.db.models.deletion.CASCADE, + primary_key=True, + serialize=False, + to="webfront.entry", + ), + ), + ("tree", jsonfield.fields.JSONField(null=True)), ], - options={ - 'db_table': 'webfront_entrytaxa', - }, - ), - migrations.RemoveField( - model_name='entry', - name='taxa', + options={"db_table": "webfront_entrytaxa"}, ), + migrations.RemoveField(model_name="entry", name="taxa"), ] diff --git a/webfront/migrations/0020_alter_entryannotation_num_sequences.py b/webfront/migrations/0020_alter_entryannotation_num_sequences.py index e318c838..e8f099b7 100644 --- a/webfront/migrations/0020_alter_entryannotation_num_sequences.py +++ b/webfront/migrations/0020_alter_entryannotation_num_sequences.py @@ -5,14 +5,12 @@ class Migration(migrations.Migration): - dependencies = [ - ('webfront', '0019_entrytaxa_table'), - ] + dependencies = [("webfront", "0019_entrytaxa_table")] operations = [ migrations.AlterField( - model_name='entryannotation', - name='num_sequences', + model_name="entryannotation", + name="num_sequences", field=models.IntegerField(null=True), - ), + ) ] diff --git a/webfront/pagination.py b/webfront/pagination.py index 690b94b9..bf0a3b54 100644 --- a/webfront/pagination.py +++ b/webfront/pagination.py @@ -129,61 +129,3 @@ def get_previous_link(self): return replace_query_param( self.base_url, "cursor", "-{}".format(self.before_key) ) - - -class CustomPaginationOld(PageNumberPagination): - page_size = settings.INTERPRO_CONFIG.get("default_page_size", 20) - page_size_query_param = "page_size" - max_page_size = 200 - ordering = "-accession" - django_paginator_class = CustomPaginator - current_size = None - - def get_paginated_response(self, data): - self.current_size = ( - self.page.paginator.count - if self.current_size is None - else self.current_size - ) - return Response( - OrderedDict( - [ - ("count", self.current_size), - ("next", self.get_next_link()), - ("previous", self.get_previous_link()), - ("results", data), - ] - ) - ) - - def paginate_queryset(self, queryset, request, **kwargs): - self.current_size = None - if "search_size" in kwargs and kwargs["search_size"] is not None: - if not queryset.ordered: - queryset = queryset.order_by("accession") - self.current_size = kwargs["search_size"] - - return super(CustomPagination, self).paginate_queryset( - queryset, request, kwargs["view"] - ) - - def get_next_link(self): - if not self.has_next(): - return None - url = replace_url_host(self.request.build_absolute_uri()) - page_number = self.page.number + 1 - return replace_query_param(url, self.page_query_param, page_number) - - def has_next(self): - if self.current_size is None: - return False - return self.page.number * self.page.paginator.per_page < self.current_size - - def get_previous_link(self): - if not self.page.has_previous(): - return None - url = replace_url_host(self.request.build_absolute_uri()) - page_number = self.page.previous_page_number() - if page_number == 1: - return remove_query_param(url, self.page_query_param) - return replace_query_param(url, self.page_query_param, page_number) diff --git a/webfront/searcher/elastic_controller.py b/webfront/searcher/elastic_controller.py index 8eb9b69f..4dc4872b 100644 --- a/webfront/searcher/elastic_controller.py +++ b/webfront/searcher/elastic_controller.py @@ -287,58 +287,6 @@ def get_group_obj_of_field_by_query( return output - def get_group_obj_copy_of_field_by_query( - self, query, field, fq=None, rows=1, cursor=None, inner_field_to_count=None - ): - # TODO: change to new pagination - query = self.queryset_manager.get_searcher_query() if query is None else query - facet = { - "aggs": { - "ngroups": {"cardinality": {"field": field}}, - "groups": { - "composite": { - "size": rows, - "sources": [{"source": {"terms": {"field": field}}}], - }, - "aggs": {"tops": {"top_hits": {"size": 1}}}, - }, - }, - "size": 0, - } - after, before = getAfterBeforeFromCursor(cursor) - reset_direction = self.addAfterKeyToQueryComposite( - facet["aggs"]["groups"]["composite"], after, before - ) - if inner_field_to_count is not None: - facet["aggs"]["groups"]["aggs"]["unique"] = { - "cardinality": {"field": inner_field_to_count} - } - if fq is not None: - query += " && " + fq - response = self._elastic_json_query(query, facet) - if reset_direction: - self.reverseOrderDirection(facet["aggs"]["groups"]["composite"]) - after_key = self.getAfterKey(response, facet, before, query) - before_key = self.getBeforeKey(response, facet, before, query) - buckets = response["aggregations"]["groups"]["buckets"] - if len(buckets) > 0 and "tops" not in buckets[0]: - buckets = [b for sb in buckets for b in sb["subgroups"]["buckets"]] - output = { - "groups": [ - bucket["tops"]["hits"]["hits"][0]["_source"] for bucket in buckets - ], - "ngroups": response["aggregations"]["ngroups"], - "after_key": after_key, - "before_key": before_key, - } - if inner_field_to_count is not None: - i = 0 - for bucket in response["aggregations"]["groups"]["buckets"]: - output["groups"][i]["unique"] = bucket["unique"] - i += 1 - - return output - def get_list_of_endpoint(self, endpoint, query=None, rows=10, start=0, cursor=None): should_keep_elastic_order = False qs = self.queryset_manager.get_searcher_query() if query is None else query @@ -365,14 +313,9 @@ def get_list_of_endpoint(self, endpoint, query=None, rows=10, start=0, cursor=No field, direction = match.groups() if field != facet["aggs"]["ngroups"]["cardinality"]["field"]: # Custom field takes priority over default one ('source') - facet["aggs"]["groups"]["composite"]["sources"].insert(0, { - field: { - "terms": { - "field": field, - "order": direction - } - } - }) + facet["aggs"]["groups"]["composite"]["sources"].insert( + 0, {field: {"terms": {"field": field, "order": direction}}} + ) should_keep_elastic_order = True after, before = getAfterBeforeFromCursor(cursor) diff --git a/webfront/serializers/interpro.py b/webfront/serializers/interpro.py index 238555e0..77f59429 100644 --- a/webfront/serializers/interpro.py +++ b/webfront/serializers/interpro.py @@ -204,8 +204,7 @@ def reformat_cross_references(cross_references): @staticmethod def to_metadata_representation(instance, searcher, sq, counters=None): results = EntryAnnotation.objects.filter(accession=instance.accession).only( - "type", - "num_sequences" + "type", "num_sequences" ) annotation_types = {x.type: x.num_sequences or 0 for x in results} if counters is None: diff --git a/webfront/templatetags/interpro_tags.py b/webfront/templatetags/interpro_tags.py index 62c7686e..546e6cc8 100644 --- a/webfront/templatetags/interpro_tags.py +++ b/webfront/templatetags/interpro_tags.py @@ -7,8 +7,11 @@ register = template.Library() + @register.simple_tag def get_url_with_prefix(request, key, val): iri = request.get_full_path() uri = iri_to_uri(iri) - return settings.INTERPRO_CONFIG["url_path_prefix"] + escape(replace_query_param(uri, key, val)) + return settings.INTERPRO_CONFIG["url_path_prefix"] + escape( + replace_query_param(uri, key, val) + ) diff --git a/webfront/tests/fixtures_reader.py b/webfront/tests/fixtures_reader.py index 73aa101e..1ca623d7 100644 --- a/webfront/tests/fixtures_reader.py +++ b/webfront/tests/fixtures_reader.py @@ -115,17 +115,17 @@ def get_fixtures(self): "entry_integrated": self.entries[e]["integrated"], "entry_date": self.entries[e]["entry_date"], "text_entry": e - + " " - + self.entries[e]["type"] - + " " - + (" ".join(self.entries[e]["description"])), + + " " + + self.entries[e]["type"] + + " " + + (" ".join(self.entries[e]["description"])), "protein_acc": p, "protein_db": self.proteins[p]["source_database"], "text_protein": p - + " " - + self.proteins[p]["source_database"] - + " " - + (" ".join(self.proteins[p]["description"])), + + " " + + self.proteins[p]["source_database"] + + " " + + (" ".join(self.proteins[p]["description"])), "ida_id": self.proteins[p]["ida_id"], "ida": self.proteins[p]["ida"], "tax_id": self.proteins[p]["organism"]["taxId"], @@ -250,16 +250,14 @@ def get_fixtures(self): # Creating obj to add for proteins without entry or structure for p in self.proteins: - p_ocurrences = len([t for t in to_add if t['protein_acc']==p ]) + p_ocurrences = len([t for t in to_add if t["protein_acc"] == p]) if p_ocurrences == 0: to_add.append( { "text": p, "protein_acc": p, "protein_db": self.proteins[p]["source_database"], - "text_protein": p - + " " - + self.proteins[p]["source_database"], + "text_protein": p + " " + self.proteins[p]["source_database"], "tax_id": self.proteins[p]["organism"]["taxId"], "tax_name": self.proteins[p]["organism"]["name"], "tax_rank": self.tax2rank[ diff --git a/webfront/tests/test_mail.py b/webfront/tests/test_mail.py index a388dac9..b758bdfd 100644 --- a/webfront/tests/test_mail.py +++ b/webfront/tests/test_mail.py @@ -5,11 +5,13 @@ class TestMail(TestCase): def test_mail(self): self.client = Client() - response = self.client.post('/api/mail/', - { - 'path': 'echo', - 'subject': 'Add annotation test from API', - 'message': 'Test', - 'from_email': 'swaathik@ebi.ac.uk' - }) - self.assertEqual(response.json()['from'], 'swaathik@ebi.ac.uk') + response = self.client.post( + "/api/mail/", + { + "path": "echo", + "subject": "Add annotation test from API", + "message": "Test", + "from_email": "swaathik@ebi.ac.uk", + }, + ) + self.assertEqual(response.json()["from"], "swaathik@ebi.ac.uk") diff --git a/webfront/tests/tests_3_endpoints_using_searcher.py b/webfront/tests/tests_3_endpoints_using_searcher.py index 9a10522d..2e1f562e 100644 --- a/webfront/tests/tests_3_endpoints_using_searcher.py +++ b/webfront/tests/tests_3_endpoints_using_searcher.py @@ -23,7 +23,9 @@ }, "structure": {"pdb": ["1JM7", "1T2V", "2BKM", "1JZ8"]}, "taxonomy": {"uniprot": ["1", "2", "2579", "40296", "344612", "1001583", "10090"]}, - "proteome": {"uniprot": ["UP000006701", "UP000012042", "UP000030104", "UP000000589"]}, + "proteome": { + "uniprot": ["UP000006701", "UP000012042", "UP000030104", "UP000000589"] + }, "set": {"pfam": ["CL0001", "CL0002"]}, } diff --git a/webfront/tests/tests_entry_endpoint.py b/webfront/tests/tests_entry_endpoint.py index fb728fac..33c834e1 100644 --- a/webfront/tests/tests_entry_endpoint.py +++ b/webfront/tests/tests_entry_endpoint.py @@ -146,8 +146,7 @@ def test_can_read_entry_pfam_id(self): self.assertIn("counters", response.data["metadata"].keys()) self.assertIn("proteins", response.data["metadata"]["counters"].keys()) self.assertIn("entry_annotations", response.data["metadata"].keys()) - self.assertIsInstance(response.data["metadata"]["entry_annotations"], - dict) + self.assertIsInstance(response.data["metadata"]["entry_annotations"], dict) for k, v in response.data["metadata"]["entry_annotations"].items(): self.assertIsInstance(k, str) self.assertIsInstance(v, int) diff --git a/webfront/tests/tests_modifiers.py b/webfront/tests/tests_modifiers.py index 1afe5325..81c37d18 100644 --- a/webfront/tests/tests_modifiers.py +++ b/webfront/tests/tests_modifiers.py @@ -25,7 +25,9 @@ def test_can_get_the_entry_type_groups_proteins_by_tax_id(self): response = self.client.get("/api/protein?group_by=tax_id") self.assertEqual(response.status_code, status.HTTP_200_OK) # Only Mus musculus is a key species - self.assertEqual({'10090': {'value': 1, 'title': 'Mus musculus'}}, response.data) + self.assertEqual( + {"10090": {"value": 1, "title": "Mus musculus"}}, response.data + ) def test_can_group_interpro_entries_with_member_databases(self): response = self.client.get("/api/entry/interpro?group_by=member_databases") @@ -381,7 +383,7 @@ def test_annotation_modifier_hmm(self): def test_annotation_modifier_logo(self): response = self.client.get("/api/entry/pfam/pf02171?annotation=logo") self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertEqual(response['content-type'], "application/json") + self.assertEqual(response["content-type"], "application/json") data = json.loads(response.content) self.assertIn("ali_map", data) self.assertEqual(302, len(data["ali_map"])) @@ -392,7 +394,9 @@ def test_annotation_modifier_pfam_alignment(self): self.assertEqual(response["content-type"], "text/plain") def test_annotation_modifier_interpro_alignment(self): - response = self.client.get("/api/entry/interpro/ipr003165?annotation=alignment:seed") + response = self.client.get( + "/api/entry/interpro/ipr003165?annotation=alignment:seed" + ) self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(response["content-type"], "text/plain") @@ -437,7 +441,8 @@ def test_taxa_modifier(self): def test_no_taxa_modifier(self): response = self.client.get("/api/entry/interpro/IPR001165?taxa") self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT) - + + class TaxonomyScientificNameModifierTest(InterproRESTTestCase): def test_scientific_name_modifier(self): response = self.client.get("/api/taxonomy/uniprot/?scientific_name=Bacteria") @@ -522,11 +527,17 @@ def test_subfamilies_counter(self): def test_panther_subfamilies(self): for entry in self.entries: - response = self.client.get(f"/api/entry/{entry['db']}/{entry['acc']}?subfamilies") + response = self.client.get( + f"/api/entry/{entry['db']}/{entry['acc']}?subfamilies" + ) self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(len(response.data["results"]), 1) - self.assertEqual(response.data["results"][0]["metadata"]["accession"], entry['sf']) - self.assertEqual(response.data["results"][0]["metadata"]["integrated"], entry['acc']) + self.assertEqual( + response.data["results"][0]["metadata"]["accession"], entry["sf"] + ) + self.assertEqual( + response.data["results"][0]["metadata"]["integrated"], entry["acc"] + ) def test_no_subfamilies_in_pfam(self): response = self.client.get(f"/api/entry/pfam/PF02171") diff --git a/webfront/tests/tests_protein_endpoint_entry_filter.py b/webfront/tests/tests_protein_endpoint_entry_filter.py index c787ef3f..17ae4860 100644 --- a/webfront/tests/tests_protein_endpoint_entry_filter.py +++ b/webfront/tests/tests_protein_endpoint_entry_filter.py @@ -138,7 +138,9 @@ def test_urls_that_return_a_protein_details_with_matches(self): "/api/protein/uniprot/" + sp_2 + "/entry/interpro": ["IPR003165", "IPR001165"], - "/api/protein/uniprot/" + sp_1 + "/entry/unintegrated": ["PF17180", "PTHR43214"], + "/api/protein/uniprot/" + + sp_1 + + "/entry/unintegrated": ["PF17180", "PTHR43214"], "/api/protein/uniprot/" + sp_2 + "/entry/pfam": ["PF17176", "PF02171"], "/api/protein/uniprot/" + sp_2 + "/entry/interpro/pfam": ["PF02171"], "/api/protein/uniprot/" + sp_2 + "/entry/interpro/smart": ["SM00950"], diff --git a/webfront/tests/tests_structure_endpoint_entry_filter.py b/webfront/tests/tests_structure_endpoint_entry_filter.py index 42c54287..6fef7606 100644 --- a/webfront/tests/tests_structure_endpoint_entry_filter.py +++ b/webfront/tests/tests_structure_endpoint_entry_filter.py @@ -121,7 +121,9 @@ def test_urls_that_return_a_structure_details_with_matches(self): pdb_2 = "2BKM" acc = "IPR003165" urls = { - "/api/structure/pdb/" + pdb_2 + "/entry/unintegrated": ["PF17180", "PTHR43214"], + "/api/structure/pdb/" + + pdb_2 + + "/entry/unintegrated": ["PF17180", "PTHR43214"], "/api/structure/pdb/" + pdb_1 + "/entry/unintegrated": ["PF17180", "PF17176", "PTHR43214"], @@ -165,9 +167,13 @@ def test_urls_that_return_a_structure_details_with_matches_from_chain(self): pdb_2 = "2BKM" acc = "IPR003165" urls = { - "/api/structure/pdb/" + pdb_2 + "/B/entry/unintegrated": ["PF17180", "PTHR43214"], + "/api/structure/pdb/" + + pdb_2 + + "/B/entry/unintegrated": ["PF17180", "PTHR43214"], "/api/structure/pdb/" + pdb_1 + "/A/entry/unintegrated": ["PF17176"], - "/api/structure/pdb/" + pdb_1 + "/B/entry/unintegrated": ["PF17180", "PTHR43214"], + "/api/structure/pdb/" + + pdb_1 + + "/B/entry/unintegrated": ["PF17180", "PTHR43214"], "/api/structure/pdb/" + pdb_1 + "/A/entry/interpro": ["IPR003165", "IPR001165"], diff --git a/webfront/tests/tests_to_check_the_payload_structure_combining_endpoints.py b/webfront/tests/tests_to_check_the_payload_structure_combining_endpoints.py index b85518d4..f27337ee 100644 --- a/webfront/tests/tests_to_check_the_payload_structure_combining_endpoints.py +++ b/webfront/tests/tests_to_check_the_payload_structure_combining_endpoints.py @@ -25,7 +25,9 @@ }, "structure": {"pdb": ["1JM7", "1T2V", "2BKM", "1JZ8"]}, "taxonomy": {"uniprot": ["1", "2", "2579", "40296", "344612", "1001583", "10090"]}, - "proteome": {"uniprot": ["UP000006701", "UP000012042", "UP000030104", "UP000000589"]}, + "proteome": { + "uniprot": ["UP000006701", "UP000012042", "UP000030104", "UP000000589"] + }, } plurals = ModelContentSerializer.plurals @@ -252,7 +254,9 @@ def test_db_db(self): elif response.status_code != status.HTTP_204_NO_CONTENT: self.assertEqual( - response.status_code, status.HTTP_204_NO_CONTENT, "URL : [{}]".format(current), + response.status_code, + status.HTTP_204_NO_CONTENT, + "URL : [{}]".format(current), ) def test_db_acc(self): diff --git a/webfront/tests/tests_utils_endpoint.py b/webfront/tests/tests_utils_endpoint.py index 2e05fb2b..fb9f4310 100644 --- a/webfront/tests/tests_utils_endpoint.py +++ b/webfront/tests/tests_utils_endpoint.py @@ -68,8 +68,8 @@ def test_accession_endpoint_with_gene_name(self): self.assertEqual(response.data["endpoint"], "protein") self.assertEqual(response.data["source_database"], "unreviewed") self.assertIn("proteins", response.data) - self.assertGreater( len(response.data["proteins"]), 0) - self.assertEqual(response.data["proteins"][0]['accession'], "Q0VDM6") + self.assertGreater(len(response.data["proteins"]), 0) + self.assertEqual(response.data["proteins"][0]["accession"], "Q0VDM6") class UtilsReleaseTest(InterproRESTTestCase): diff --git a/webfront/views/mail.py b/webfront/views/mail.py index 884ca1f6..9a36deca 100644 --- a/webfront/views/mail.py +++ b/webfront/views/mail.py @@ -11,61 +11,51 @@ def mail_interhelp(request): ip_address = get_client_ip(request) now = datetime.now() - if not hasattr(settings, 'credentials'): + if not hasattr(settings, "credentials"): return store_credentials_and_mail(request, ip_address, now) else: last_accessed = settings.credentials - if last_accessed['ip'] == ip_address: - then = datetime.strptime(last_accessed['time'], "%Y-%m-%d %H:%M:%S.%f") + if last_accessed["ip"] == ip_address: + then = datetime.strptime(last_accessed["time"], "%Y-%m-%d %H:%M:%S.%f") time_diff = now - then elapsed_min = time_diff / timedelta(minutes=1) if elapsed_min >= 1: return store_credentials_and_mail(request, ip_address, now) else: - data = { - 'error': 'Request Aborted', - } + data = {"error": "Request Aborted"} return JsonResponse(data, status=429) else: return store_credentials_and_mail(request, ip_address, now) def get_client_ip(request): - x_forwarded_for = request.META.get('HTTP_X_FORWARDED_FOR') + x_forwarded_for = request.META.get("HTTP_X_FORWARDED_FOR") if x_forwarded_for: - ip = x_forwarded_for.split(',')[0] + ip = x_forwarded_for.split(",")[0] else: - ip = request.META.get('REMOTE_ADDR') + ip = request.META.get("REMOTE_ADDR") return ip def store_credentials_and_mail(request, ip, time): - settings.credentials = { - 'ip': ip, - 'time': time.strftime("%Y-%m-%d %H:%M:%S.%f") - } + settings.credentials = {"ip": ip, "time": time.strftime("%Y-%m-%d %H:%M:%S.%f")} return mail(request) def mail(request): - path = request.POST.get('path', INTERPRO_CONFIG.get("sendmail_path")) - subject = request.POST.get('subject', '') - message = request.POST.get('message', '') - from_email = request.POST.get('from_email', '') + path = request.POST.get("path", INTERPRO_CONFIG.get("sendmail_path")) + subject = request.POST.get("subject", "") + message = request.POST.get("message", "") + from_email = request.POST.get("from_email", "") if path and subject and message and from_email: message = MIMEText(message) - message['From'] = from_email - message['To'] = 'interhelp@ebi.ac.uk' - message['Subject'] = subject + message["From"] = from_email + message["To"] = "interhelp@ebi.ac.uk" + message["Subject"] = subject p = Popen([path, "-t", "-oi"], stdin=PIPE) p.communicate(message.as_bytes()) - data = { - 'from': from_email, - 'subject': subject, - } + data = {"from": from_email, "subject": subject} return JsonResponse(data) else: - data = { - 'error': 'Make sure all fields are entered and valid', - } + data = {"error": "Make sure all fields are entered and valid"} return JsonResponse(data, status=400) diff --git a/webfront/views/modifiers.py b/webfront/views/modifiers.py index 07a59e86..0180550e 100644 --- a/webfront/views/modifiers.py +++ b/webfront/views/modifiers.py @@ -17,7 +17,12 @@ StructuralModel, ) from webfront.views.custom import filter_queryset_accession_in -from webfront.exceptions import EmptyQuerysetError, HmmerWebError, ExpectedUniqueError, InvalidOperationRequest +from webfront.exceptions import ( + EmptyQuerysetError, + HmmerWebError, + ExpectedUniqueError, + InvalidOperationRequest, +) from django.conf import settings from urllib import request, parse @@ -279,13 +284,12 @@ def filter_by_entry_db(value, general_handler): def filter_by_min_value(endpoint, field, value, sort_direction="asc"): def x(_, general_handler): general_handler.queryset_manager.add_filter( - endpoint, - **{ - "{}__gte".format(field): value - }, + endpoint, **{"{}__gte".format(field): value} ) if sort_direction in ("asc", "desc"): - general_handler.queryset_manager.order_by("{}:{}".format(field, sort_direction)) + general_handler.queryset_manager.order_by( + "{}:{}".format(field, sort_direction) + ) else: raise ValueError("{} is not a valid sorting order".format(sort_direction)) @@ -542,22 +546,23 @@ def get_isoforms(value, general_handler): return {"results": [iso.accession for iso in isoforms], "count": len(isoforms)} + def run_hmmsearch(model): """ run hmmsearch using hmm model against reviewed uniprot proteins """ - parameters = { - "seq": model, - "seqdb": "swissprot", - } - + parameters = {"seq": model, "seqdb": "swissprot"} + enc_params = parse.urlencode(parameters).encode() url = "https://www.ebi.ac.uk/Tools/hmmer/search/hmmsearch" - req = request.Request(url=url, data=enc_params, headers={"Accept": "application/json"}) + req = request.Request( + url=url, data=enc_params, headers={"Accept": "application/json"} + ) with request.urlopen(req) as response: raw_results = response.read().decode("utf-8") results = loads(raw_results) - return results['results']['hits'] + return results["results"]["hits"] + def calculate_conservation_scores(entry_acc): """ @@ -661,45 +666,65 @@ def calculate_residue_conservation(entry_db, general_handler): # will always have one protein in queryset protein = queryset[0] - if protein.source_database != 'reviewed': + if protein.source_database != "reviewed": raise InvalidOperationRequest( - f"Conservation data can only be calculated for proteins in UniProt reviewed." - ) + f"Conservation data can only be calculated for proteins in UniProt reviewed." + ) # get entries matching the sequence from the selected database - q = "protein_acc:{} && entry_db:{}".format(protein.accession.lower(), entry_db.lower()) + q = "protein_acc:{} && entry_db:{}".format( + protein.accession.lower(), entry_db.lower() + ) searcher = general_handler.searcher results = searcher.execute_query(q, None, None) # process each hit sequence = protein.sequence alignments = {"sequence": sequence, entry_db: {"entries": {}}} - if "hits" in results.keys() and "hits" in results["hits"] and len(results["hits"]["hits"]) > 0 : + if ( + "hits" in results.keys() + and "hits" in results["hits"] + and len(results["hits"]["hits"]) > 0 + ): entries = results["hits"]["hits"] for entry in entries: - entry_annotation = EntryAnnotation.objects.filter(accession_id=entry["_source"]["entry_acc"], type="hmm")[0] + entry_annotation = EntryAnnotation.objects.filter( + accession_id=entry["_source"]["entry_acc"], type="hmm" + )[0] model = gzip.decompress(entry_annotation.value).decode("utf-8") hits = run_hmmsearch(model) - protein_dict = {x['acc']: x for x in hits} - protein_hits = list(filter(lambda x: x['acc'] == protein.identifier, hits)) + protein_dict = {x["acc"]: x for x in hits} + protein_hits = list(filter(lambda x: x["acc"] == protein.identifier, hits)) if len(protein_hits) > 0: alignments[entry_db]["entries"][entry_annotation.accession_id] = [] - logo_score = calculate_conservation_scores(entry_annotation.accession_id) - domains = [hit['domains'] for hit in protein_hits][0] + logo_score = calculate_conservation_scores( + entry_annotation.accession_id + ) + domains = [hit["domains"] for hit in protein_hits][0] for hit in domains: # calculate scores for each domain hit for each entry mappedseq, modelseq, hmmfrom, hmmto, alisqfrom, alisqto = align_seq_to_model( hit, sequence ) matrixseq = get_hmm_matrix( - logo_score, alisqfrom, alisqto, hmmfrom, hmmto, mappedseq, modelseq + logo_score, + alisqfrom, + alisqto, + hmmfrom, + hmmto, + mappedseq, + modelseq, ) formatted_matrix = format_logo(matrixseq) - alignments[entry_db]["entries"][entry_annotation.accession_id].append(formatted_matrix) + alignments[entry_db]["entries"][ + entry_annotation.accession_id + ].append(formatted_matrix) else: - if 'warnings' not in alignments[entry_db]: - alignments[entry_db]['warnings'] = [] - alignments[entry_db]['warnings'].append(f"Hmmer did not match Entry {entry_annotation.accession_id} with Protein {protein.identifier}.") + if "warnings" not in alignments[entry_db]: + alignments[entry_db]["warnings"] = [] + alignments[entry_db]["warnings"].append( + f"Hmmer did not match Entry {entry_annotation.accession_id} with Protein {protein.identifier}." + ) return alignments @@ -782,10 +807,11 @@ def get_sunburst_taxa(value, general_handler): taxa = EntryTaxa.objects.filter( accession__in=general_handler.queryset_manager.get_queryset() ) - if taxa.count()==0: + if taxa.count() == 0: raise EmptyQuerysetError("This entry doesn't have taxa") return {"taxa": taxa.first().tree} + def extra_features(value, general_handler): features = ProteinExtraFeatures.objects.filter( protein_acc__in=general_handler.queryset_manager.get_queryset() @@ -873,11 +899,10 @@ def get_subfamilies(value, general_handler): queryset = general_handler.queryset_manager.get_queryset().first() entries = Entry.objects.filter(integrated=queryset.accession, is_alive=False) if len(entries) == 0: - raise EmptyQuerysetError( - "There is are not subfamilies for this entry" - ) + raise EmptyQuerysetError("There is are not subfamilies for this entry") general_handler.modifiers.search_size = len(entries) return entries + def passing(x, y): pass diff --git a/webfront/views/utils.py b/webfront/views/utils.py index 1fcf206f..56805f51 100644 --- a/webfront/views/utils.py +++ b/webfront/views/utils.py @@ -117,7 +117,9 @@ def get( "accession": first.accession, } else: - qs2 = Protein.objects.filter(tax_id__in=list(organisms.keys())).filter(gene__iexact=acc) + qs2 = Protein.objects.filter(tax_id__in=list(organisms.keys())).filter( + gene__iexact=acc + ) if qs2.count() > 0: first = qs2.first() @@ -125,10 +127,13 @@ def get( "endpoint": "protein", "source_database": first.source_database, "proteins": [ - {"accession": item.accession, - "organism": item.organism["scientificName"], - "tax_id":item.tax_id - } for item in qs2], + { + "accession": item.accession, + "organism": item.organism["scientificName"], + "tax_id": item.tax_id, + } + for item in qs2 + ], } else: hit = docs["hits"]["hits"][0]["_source"] From 0dd90b51985f2cec0468b7fb308058bb08517062 Mon Sep 17 00:00:00 2001 From: "Gustavo A. Salazar" Date: Tue, 4 Oct 2022 16:50:39 +0100 Subject: [PATCH 26/26] don't cache if [accession URL]?page_size=XX --- webfront/views/cache.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/webfront/views/cache.py b/webfront/views/cache.py index b8cd29e7..cc31e7c3 100644 --- a/webfront/views/cache.py +++ b/webfront/views/cache.py @@ -31,11 +31,9 @@ def get_timeout_from_path(path, endpoint_levels): # it doesn't have modifiers if len(query.keys()) == 0: return SHOULD_NO_CACHE - if ( # The only modifier is page_size=20 or default + if ( # The only modifier is page_size len(query.keys()) == 1 and "page_size" in query - and query["page_size"] - == settings.INTERPRO_CONFIG.get("default_page_size", 20) ): return SHOULD_NO_CACHE