diff --git a/cl/lib/elasticsearch_utils.py b/cl/lib/elasticsearch_utils.py index 6b48fb7bab..835bd52c36 100644 --- a/cl/lib/elasticsearch_utils.py +++ b/cl/lib/elasticsearch_utils.py @@ -2780,7 +2780,7 @@ def get_child_top_hits_limit( def do_count_query( search_query: Search, -) -> int | None: +) -> int: """Execute an Elasticsearch count query and catch errors. :param search_query: Elasticsearch DSL Search object. :return: The results count. @@ -2792,7 +2792,8 @@ def do_count_query( f"Error on count query request: {search_query.to_dict()}" ) logger.warning(f"Error was: {e}") - total_results = None + # Required for the paginator class to work, as it expects an integer. + total_results = 0 return total_results diff --git a/cl/lib/search_utils.py b/cl/lib/search_utils.py index b16446f9da..4d95ffa57e 100644 --- a/cl/lib/search_utils.py +++ b/cl/lib/search_utils.py @@ -1,6 +1,5 @@ import re from datetime import date, datetime, timedelta -from math import ceil from typing import Any, Dict, List, Optional, Tuple, Union, cast from urllib.parse import parse_qs, urlencode @@ -1229,14 +1228,37 @@ def store_search_query(request: HttpRequest, search_results: dict) -> None: return if is_es_search: - search_query.query_time_ms = ceil(search_results["results_details"][0]) + search_query.query_time_ms = search_results["results_details"][0] # do_es_search returns 1 as query time if the micro cache was hit search_query.hit_cache = search_query.query_time_ms == 1 else: # Solr searches are not cached unless a cache_key is passed # No cache_key is passed for the endpoints we are storing - search_query.query_time_ms = ceil( - search_results["results"].object_list.QTime - ) + search_query.query_time_ms = search_results[ + "results" + ].object_list.QTime search_query.save() + + +def store_search_api_query( + request: HttpRequest, failed: bool, query_time: int | None, engine: int +) -> None: + """Store the search query from the Search API. + + :param request: The HTTP request object. + :param failed: Boolean indicating if the query execution failed. + :param query_time: The time taken to execute the query in milliseconds or + None if not applicable. + :param engine: The search engine used to execute the query. + :return: None + """ + SearchQuery.objects.create( + user=None if request.user.is_anonymous else request.user, + get_params=request.GET.urlencode(), + failed=failed, + query_time_ms=query_time, + hit_cache=False, + source=SearchQuery.API, + engine=engine, + ) diff --git a/cl/search/admin.py b/cl/search/admin.py index b1e87d995b..1fbe71cfdb 100644 --- a/cl/search/admin.py +++ b/cl/search/admin.py @@ -24,6 +24,7 @@ Parenthetical, ParentheticalGroup, RECAPDocument, + SearchQuery, ) from cl.search.tasks import add_items_to_solr @@ -351,3 +352,11 @@ class ParentheticalGroupAdmin(CursorPaginatorAdmin): "opinion", "representative", ) + + +@admin.register(SearchQuery) +class SearchQueryAdmin(CursorPaginatorAdmin): + raw_id_fields = ("user",) + list_display = ("__str__", "engine", "source") + list_filter = ("engine", "source") + search_fields = ("user__username",) diff --git a/cl/search/api_utils.py b/cl/search/api_utils.py index 8f2b11b74b..122b06f944 100644 --- a/cl/search/api_utils.py +++ b/cl/search/api_utils.py @@ -22,6 +22,7 @@ set_results_highlights, ) from cl.lib.scorched_utils import ExtraSolrInterface +from cl.lib.search_utils import store_search_api_query from cl.lib.utils import map_to_docket_entry_sorting from cl.search.constants import SEARCH_HL_TAG, cardinality_query_unique_ids from cl.search.documents import ( @@ -33,7 +34,7 @@ PersonDocument, ) from cl.search.exception import ElasticBadRequestError, ElasticServerError -from cl.search.models import SEARCH_TYPES +from cl.search.models import SEARCH_TYPES, SearchQuery from cl.search.types import ESCursor logger = logging.getLogger(__name__) @@ -119,13 +120,19 @@ def get_object_list(request, cd, paginator): or is_recap_active ): sl = ESList( + request=request, main_query=main_query, offset=offset, page_size=page_size, type=cd["type"], ) else: - sl = SolrList(main_query=main_query, offset=offset, type=cd["type"]) + sl = SolrList( + request=request, + main_query=main_query, + offset=offset, + type=cd["type"], + ) return sl @@ -135,8 +142,11 @@ class ESList: as they are queried. """ - def __init__(self, main_query, offset, page_size, type, length=None): + def __init__( + self, request, main_query, offset, page_size, type, length=None + ): super().__init__() + self.request = request self.main_query = main_query self.offset = offset self.page_size = page_size @@ -170,7 +180,29 @@ def __getitem__(self, item): self.main_query = self.main_query[ self.offset : self.offset + self.page_size ] - results = self.main_query.execute() + + error_to_raise = None + try: + results = self.main_query.execute() + except (TransportError, ConnectionError, RequestError) as e: + error_to_raise = ElasticServerError + except ApiError as e: + if "Failed to parse query" in str(e): + error_to_raise = ElasticBadRequestError + else: + logger.error("Multi-search API Error: %s", e) + error_to_raise = ElasticServerError + + # Store search query. + store_search_api_query( + request=self.request, + failed=bool(error_to_raise), + query_time=results.took if not error_to_raise else None, + engine=SearchQuery.ELASTICSEARCH, + ) + + if error_to_raise: + raise error_to_raise() # Merge unavailable fields in ES by pulling data from the DB to make # the API backwards compatible for People. @@ -210,8 +242,9 @@ class SolrList: queried. """ - def __init__(self, main_query, offset, type, length=None): + def __init__(self, request, main_query, offset, type, length=None): super().__init__() + self.request = request self.main_query = main_query self.offset = offset self.type = type @@ -245,6 +278,13 @@ def __getitem__(self, item): self.main_query["start"] = self.offset r = self.conn.query().add_extra(**self.main_query).execute() self.conn.conn.http_connection.close() + # Store search query. + store_search_api_query( + request=self.request, + failed=False, + query_time=r.QTime, + engine=SearchQuery.SOLR, + ) if r.group_field is None: # Pull the text snippet up a level for result in r.result.docs: @@ -305,12 +345,14 @@ def __init__( page_size, search_after, clean_data, + request, ): self.main_query = main_query self.child_docs_query = child_docs_query self.page_size = page_size self.search_after = search_after self.clean_data = clean_data + self.request = request self.cursor = None self.results = None self.reverse = False @@ -372,6 +414,8 @@ def get_paginated_results( child_cardinality_query = build_cardinality_count( child_count_query, child_unique_field ) + + error_to_raise = None try: multi_search = MultiSearch() multi_search = multi_search.add(self.main_query).add( @@ -388,15 +432,25 @@ def get_paginated_results( if child_cardinality_query: child_cardinality_count_response = responses[2] except (TransportError, ConnectionError, RequestError) as e: - raise ElasticServerError() + error_to_raise = ElasticServerError except ApiError as e: if "Failed to parse query" in str(e): - raise ElasticBadRequestError() + error_to_raise = ElasticBadRequestError else: logger.error("Multi-search API Error: %s", e) - raise ElasticServerError() - self.process_results(self.results) + error_to_raise = ElasticServerError + + # Store search query. + store_search_api_query( + request=self.request, + failed=bool(error_to_raise), + query_time=self.results.took if not error_to_raise else None, + engine=SearchQuery.ELASTICSEARCH, + ) + if error_to_raise: + raise error_to_raise() + self.process_results(self.results) main_query_hits = self.results.hits.total.value es_results_items = [ defaultdict(lambda: None, result.to_dict(skip_empty=False)) diff --git a/cl/search/api_views.py b/cl/search/api_views.py index b909f0210c..0706f10db5 100644 --- a/cl/search/api_views.py +++ b/cl/search/api_views.py @@ -363,11 +363,7 @@ def list(self, request, *args, **kwargs): request.version, ) es_list_instance = api_utils.CursorESList( - main_query, - child_docs_query, - None, - None, - cd, + main_query, child_docs_query, None, None, cd, request ) results_page = paginator.paginate_queryset( es_list_instance, request diff --git a/cl/search/tests/tests.py b/cl/search/tests/tests.py index 47a246f901..2c401a43e0 100644 --- a/cl/search/tests/tests.py +++ b/cl/search/tests/tests.py @@ -1558,46 +1558,65 @@ class SaveSearchQueryTest(TestCase): def setUp(self) -> None: self.client = Client() # Using plain text, fielded queries and manual filters - self.searches = [ + + self.base_searches = [ # Recap r"type=r&q=trump&type=r&order_by=score%20desc&description=Notice", # Audio r"type=oa&q=company%20court_id:illappct&type=oa&order_by=score desc", + # Opinions + r"type=o&q=thomas&type=o&order_by=score%20desc&case_name=lorem", # People r"type=p&q=thomas&type=p&order_by=score%20desc&born_after=01/01/2080", + ] + + self.searches = self.base_searches + [ # Repeat the same query, for testing cache r"type=p&q=thomas&type=p&order_by=score%20desc&born_after=01/01/2080", ] + super().setUp() self.source_error_message = ( f"Saved wrong `engine` value, expected {SearchQuery.WEBSITE}" ) + @staticmethod + def normalize_query(query, replace_space=False): + """Normalize a query dictionary by sorting lists of values. + Sometimes the search process alters the order of the query parameters, + or duplicates them. + """ + + if replace_space: + query = query.replace("%20", "+") + parsed_query = parse_qs(query) + return {k: sorted(v) for k, v in parsed_query.items()} + @override_settings(ELASTICSEARCH_MICRO_CACHE_ENABLED=True) def test_search_query_saving(self) -> None: """Do we save queries on all public endpoints""" for query in self.searches: url = f"{reverse('show_results')}?{query}" self.client.get(url) - # Compare parsed query strings; sometimes the search process - # alters the order of the query parameters, or duplicates them + # Compare parsed query strings; last_query = SearchQuery.objects.last() - parsed_query = parse_qs(query.replace("%20", "+")) - for key, stored_values in parse_qs(last_query.get_params).items(): - self.assertTrue( - parsed_query[key][0] == stored_values[0], - f"Query was not saved properly for endpoint {query}", - ) - self.assertEqual( - last_query.engine, - SearchQuery.ELASTICSEARCH, - f"Saved wrong `engine` value, expected {SearchQuery.ELASTICSEARCH}", - ) - self.assertEqual( - last_query.source, - SearchQuery.WEBSITE, - self.source_error_message, - ) + expected_query = self.normalize_query(query, replace_space=True) + stored_query = self.normalize_query(last_query.get_params) + self.assertEqual( + expected_query, + stored_query, + f"Query was not saved properly. Expected {expected_query}, got {stored_query}", + ) + self.assertEqual( + last_query.engine, + SearchQuery.ELASTICSEARCH, + f"Saved wrong `engine` value, expected {SearchQuery.ELASTICSEARCH}", + ) + self.assertEqual( + last_query.source, + SearchQuery.WEBSITE, + self.source_error_message, + ) self.assertTrue( SearchQuery.objects.last().hit_cache, @@ -1608,18 +1627,20 @@ def test_search_query_saving(self) -> None: @override_flag("oa-es-active", False) @override_flag("r-es-active", False) @override_flag("p-es-active", False) + @override_flag("o-es-active", False) def test_search_query_saving_solr(self) -> None: """Are queries saved when using solr search (do_search)""" for query in self.searches: url = f"{reverse('show_results')}?{query}" self.client.get(url) last_query = SearchQuery.objects.last() - parsed_query = parse_qs(query.replace("%20", "+")) - for key, stored_values in parse_qs(last_query.get_params).items(): - self.assertTrue( - parsed_query[key][0] == stored_values[0], - f"Query was not saved properly for endpoint {query}", - ) + expected_query = self.normalize_query(query, replace_space=True) + stored_query = self.normalize_query(last_query.get_params) + self.assertEqual( + expected_query, + stored_query, + f"Query was not saved properly. Expected {expected_query}, got {stored_query}", + ) self.assertEqual( last_query.engine, SearchQuery.SOLR, @@ -1647,6 +1668,120 @@ def test_failed_es_search_queries(self) -> None: f"Saved wrong `engine` value, expected {SearchQuery.ELASTICSEARCH}", ) + def test_search_api_v4_query_saving(self) -> None: + """Do we save queries on all V4 Search endpoints""" + for query in self.base_searches: + url = f"{reverse("search-list", kwargs={"version": "v4"})}?{query}" + self.client.get(url) + # Compare parsed query strings; + last_query = SearchQuery.objects.last() + expected_query = self.normalize_query(query, replace_space=True) + stored_query = self.normalize_query(last_query.get_params) + self.assertEqual( + expected_query, + stored_query, + f"Query was not saved properly. Expected {expected_query}, got {stored_query}", + ) + self.assertEqual( + last_query.engine, + SearchQuery.ELASTICSEARCH, + f"Saved wrong `engine` value, expected {SearchQuery.ELASTICSEARCH}", + ) + self.assertEqual( + last_query.source, + SearchQuery.API, + self.source_error_message, + ) + + def test_failed_es_search_v4_api_queries(self) -> None: + """Do we flag failed v4 API queries properly?""" + query = "type=r&q=contains/sproximity token" + url = f"{reverse("search-list", kwargs={"version": "v4"})}?{query}" + r = self.client.get(url) + self.assertEqual(r.status_code, 400) + last_query = SearchQuery.objects.last() + self.assertTrue(last_query.failed, "SearchQuery.failed should be True") + self.assertEqual( + last_query.query_time_ms, None, "Query time should be None" + ) + self.assertEqual( + last_query.engine, + SearchQuery.ELASTICSEARCH, + f"Saved wrong `engine` value, expected {SearchQuery.ELASTICSEARCH}", + ) + + def test_search_es_api_v3_query_saving(self) -> None: + """Do we save queries on all V3 Search endpoints""" + for query in self.base_searches: + url = f"{reverse("search-list", kwargs={"version": "v3"})}?{query}" + self.client.get(url) + # Compare parsed query strings; + last_query = SearchQuery.objects.last() + expected_query = self.normalize_query(query, replace_space=True) + stored_query = self.normalize_query(last_query.get_params) + self.assertEqual( + expected_query, + stored_query, + f"Query was not saved properly. Expected {expected_query}, got {stored_query}", + ) + self.assertEqual( + last_query.engine, + SearchQuery.ELASTICSEARCH, + f"Saved wrong `engine` value, expected {SearchQuery.ELASTICSEARCH}", + ) + self.assertEqual( + last_query.source, + SearchQuery.API, + self.source_error_message, + ) + + def test_failed_es_search_v3_api_queries(self) -> None: + """Do we flag failed ES v3 API queries properly?""" + query = "type=r&q=contains/sproximity token" + url = f"{reverse("search-list", kwargs={"version": "v3"})}?{query}" + r = self.client.get(url) + self.assertEqual(r.status_code, 500) + last_query = SearchQuery.objects.last() + self.assertTrue(last_query.failed, "SearchQuery.failed should be True") + self.assertEqual( + last_query.query_time_ms, None, "Query time should be None" + ) + self.assertEqual( + last_query.engine, + SearchQuery.ELASTICSEARCH, + f"Saved wrong `engine` value, expected {SearchQuery.ELASTICSEARCH}", + ) + + @override_flag("oa-es-active", False) + @override_flag("oa-es-activate", False) + @override_flag("r-es-search-api-active", False) + @override_flag("p-es-active", False) + @override_flag("o-es-search-api-active", False) + def test_search_solr_api_v3_query_saving(self) -> None: + """Do we save queries on all V3 Search Solr endpoints""" + for query in self.base_searches: + url = f"{reverse("search-list", kwargs={"version": "v3"})}?{query}" + self.client.get(url) + # Compare parsed query strings; + last_query = SearchQuery.objects.last() + expected_query = self.normalize_query(query, replace_space=True) + stored_query = self.normalize_query(last_query.get_params) + self.assertEqual( + expected_query, + stored_query, + f"Query was not saved properly. Expected {expected_query}, got {stored_query}", + ) + self.assertEqual( + last_query.engine, + SearchQuery.SOLR, + f"Saved wrong `engine` value, expected {SearchQuery.ELASTICSEARCH}", + ) + self.assertEqual( + last_query.source, + SearchQuery.API, + self.source_error_message, + ) + class CaptionTest(TestCase): """Can we make good looking captions?""" diff --git a/cl/search/tests/tests_es_opinion.py b/cl/search/tests/tests_es_opinion.py index e7432e344c..d4befefa9b 100644 --- a/cl/search/tests/tests_es_opinion.py +++ b/cl/search/tests/tests_es_opinion.py @@ -19,6 +19,8 @@ from elasticsearch_dsl import Q from factory import RelatedFactory from lxml import etree, html +from rest_framework.request import Request +from rest_framework.test import APIRequestFactory from waffle.testutils import override_flag from cl.custom_filters.templatetags.text_filters import html_decode @@ -492,6 +494,7 @@ def test_o_results_api_pagination(self) -> None: "order_by": "dateFiled desc", "highlight": False, } + request = Request(APIRequestFactory().get("/")) for page in range(1, total_pages + 1): search_query = OpinionClusterDocument.search() offset = max(0, (page - 1) * page_size) @@ -499,6 +502,7 @@ def test_o_results_api_pagination(self) -> None: search_query, cd, {"text": 500}, SEARCH_HL_TAG, "v3" ) hits = ESList( + request=request, main_query=main_query, offset=offset, page_size=page_size,