From 6ffbacd3f010e84df47c85894e7d2888214734d2 Mon Sep 17 00:00:00 2001 From: "Gustavo A. Salazar" Date: Thu, 16 Feb 2023 10:14:30 +0000 Subject: [PATCH 1/6] adding a list of modifiers that when used the request won't be cached --- webfront/views/cache.py | 37 ++++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/webfront/views/cache.py b/webfront/views/cache.py index cc31e7c3..13aee7ac 100644 --- a/webfront/views/cache.py +++ b/webfront/views/cache.py @@ -18,6 +18,27 @@ names = [ep["name"].lower() for ep in endpoints] +short_life_parameters = [ + "cursor", + "size", + "go_terms", + "ida_ignore", + "ida_search", + "format", + "page_size", +] + +no_cache_modifiers = [ + "extra_features", + "residues", + "isoforms", + "ida", + "taxa", + "model:" + "annotation:info", + "subfamilies", + "page_size", +] def get_timeout_from_path(path, endpoint_levels): parsed = urlparse(path) @@ -31,11 +52,9 @@ def get_timeout_from_path(path, endpoint_levels): # it doesn't have modifiers if len(query.keys()) == 0: return SHOULD_NO_CACHE - if ( # The only modifier is page_size - len(query.keys()) == 1 - and "page_size" in query - ): - return SHOULD_NO_CACHE + for parameter in no_cache_modifiers: + if parameter in query: + return SHOULD_NO_CACHE # order querystring, lowercase keys query = OrderedDict( @@ -49,14 +68,6 @@ def get_timeout_from_path(path, endpoint_levels): return FIVE_DAYS except Exception: return SHOULD_NO_CACHE - short_life_parameters = [ - "cursor", - "size", - "go_terms", - "ida_ignore", - "ida_search", - "format", - ] for parameter in short_life_parameters: value = query.get(parameter) if value is not None: From 1e51ea84e39a8beceb293b92a36579ad79a0e685 Mon Sep 17 00:00:00 2001 From: "Gustavo A. Salazar" Date: Thu, 16 Feb 2023 10:26:36 +0000 Subject: [PATCH 2/6] more modifiers to cache lists --- webfront/views/cache.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/webfront/views/cache.py b/webfront/views/cache.py index 13aee7ac..72f8e0f1 100644 --- a/webfront/views/cache.py +++ b/webfront/views/cache.py @@ -26,6 +26,7 @@ "ida_search", "format", "page_size", + "search", ] no_cache_modifiers = [ @@ -37,6 +38,7 @@ "model:" "annotation:info", "subfamilies", + "subfamily", "page_size", ] From e66180893b88c59cec4acdbbe48ab3da0e6dc528 Mon Sep 17 00:00:00 2001 From: "Gustavo A. Salazar" Date: Thu, 16 Feb 2023 11:35:59 +0000 Subject: [PATCH 3/6] tests for checking the cache life span --- webfront/tests/tests_utils.py | 37 ++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/webfront/tests/tests_utils.py b/webfront/tests/tests_utils.py index 5b33c145..44e2cd1a 100644 --- a/webfront/tests/tests_utils.py +++ b/webfront/tests/tests_utils.py @@ -1,6 +1,7 @@ from django.test import TestCase -from webfront.views.cache import canonical +from views.common import map_url_to_levels +from webfront.views.cache import canonical, get_timeout_from_path, SHOULD_NO_CACHE, FIVE_DAYS class CanonicalTestCase(TestCase): @@ -54,3 +55,37 @@ def with_query_remove_unneeded_urls(self): "/api/entry/InterPro/?integrated=pfam", canonical("/api/entry/InterPro/?integrated=pfam&page_size=20"), ) + + +class CacheLifespanTestCase(TestCase): + def test_urls_no_cacheable(self): + urls = [ + "/entry/InterPro/IPR000001/", + "/protein/uniprot/p99999/?extra_features", + "/entry/InterPro/?page", + ] + for url in urls: + levels = map_url_to_levels(url.split('?')[0]) + self.assertEqual(SHOULD_NO_CACHE, get_timeout_from_path(url, levels)) + + def test_urls_short_life(self): + urls = [ + "/entry/InterPro/?page=33", + "/entry/InterPro/?page_size=33", + "/entry/InterPro/?format", + ] + for url in urls: + levels = map_url_to_levels(url.split('?')[0]) + self.assertEqual(FIVE_DAYS, get_timeout_from_path(url, levels)) + + def test_urls_long_life(self): + urls = [ + "/entry/", + "/entry/InterPro/", + "/entry/InterPro/protein", + "/entry/InterPro/IPR000001/protein", + "/protein/uniprot/p99999/?conservation", + ] + for url in urls: + levels = map_url_to_levels(url.split('?')[0]) + self.assertIsNone(get_timeout_from_path(url, levels)) From 092ab6503dac2fe656236d63f408fd8e17aae7fe Mon Sep 17 00:00:00 2001 From: "Gustavo A. Salazar" Date: Thu, 16 Feb 2023 12:22:01 +0000 Subject: [PATCH 4/6] fix import line --- webfront/tests/tests_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/webfront/tests/tests_utils.py b/webfront/tests/tests_utils.py index 44e2cd1a..d5c8a135 100644 --- a/webfront/tests/tests_utils.py +++ b/webfront/tests/tests_utils.py @@ -1,6 +1,6 @@ from django.test import TestCase -from views.common import map_url_to_levels +from webfront.views.common import map_url_to_levels from webfront.views.cache import canonical, get_timeout_from_path, SHOULD_NO_CACHE, FIVE_DAYS From c47ab72f377723d09239f637e5756a0317ff0958 Mon Sep 17 00:00:00 2001 From: "Gustavo A. Salazar" Date: Tue, 21 Feb 2023 10:32:34 +0000 Subject: [PATCH 5/6] using the canonical URL to check the cachelifespan --- webfront/views/common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/webfront/views/common.py b/webfront/views/common.py index 95afd973..bb992979 100644 --- a/webfront/views/common.py +++ b/webfront/views/common.py @@ -20,7 +20,7 @@ from webfront.views.proteome import ProteomeHandler from webfront.views.set import SetHandler from webfront.views.utils import UtilsHandler -from webfront.views.cache import InterProCache, get_timeout_from_path, SHOULD_NO_CACHE +from webfront.views.cache import InterProCache, get_timeout_from_path, canonical, SHOULD_NO_CACHE from webfront.models import Database from concurrent.futures import ThreadPoolExecutor, wait, FIRST_COMPLETED @@ -191,7 +191,7 @@ def query(args): drf_request=drf_request, ) # Got a good response, then save it in cache - timeout = get_timeout_from_path(full_path, endpoint_levels) + timeout = get_timeout_from_path(canonical(full_path), endpoint_levels) if timeout != SHOULD_NO_CACHE: self._set_in_cache(caching_allowed, full_path, response, timeout) # Forcing to close the connection because django is not closing it when this query is ran as future From 73d32c0da6f3c222416cc2f355768fbd9265c47c Mon Sep 17 00:00:00 2001 From: "Gustavo A. Salazar" <46671268+gustavo-salazar@users.noreply.github.com> Date: Wed, 22 Feb 2023 15:19:21 +0000 Subject: [PATCH 6/6] Update webfront/views/cache.py Co-authored-by: Matthias Blum --- webfront/views/cache.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/webfront/views/cache.py b/webfront/views/cache.py index 72f8e0f1..6179fdca 100644 --- a/webfront/views/cache.py +++ b/webfront/views/cache.py @@ -71,8 +71,7 @@ def get_timeout_from_path(path, endpoint_levels): except Exception: return SHOULD_NO_CACHE for parameter in short_life_parameters: - value = query.get(parameter) - if value is not None: + if parameter in query: return FIVE_DAYS return None