Skip to content

Commit

Permalink
Merge pull request #4591 from freelawproject/4576-store-api-search-qu…
Browse files Browse the repository at this point in the history
…eries

4576 Store API search queries
  • Loading branch information
mlissner authored Oct 17, 2024
2 parents c72de53 + 4b72c9d commit 6ae2865
Show file tree
Hide file tree
Showing 7 changed files with 267 additions and 46 deletions.
5 changes: 3 additions & 2 deletions cl/lib/elasticsearch_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2780,7 +2780,7 @@ def get_child_top_hits_limit(

def do_count_query(
search_query: Search,
) -> int | None:
) -> int:
"""Execute an Elasticsearch count query and catch errors.
:param search_query: Elasticsearch DSL Search object.
:return: The results count.
Expand All @@ -2792,7 +2792,8 @@ def do_count_query(
f"Error on count query request: {search_query.to_dict()}"
)
logger.warning(f"Error was: {e}")
total_results = None
# Required for the paginator class to work, as it expects an integer.
total_results = 0
return total_results


Expand Down
32 changes: 27 additions & 5 deletions cl/lib/search_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import re
from datetime import date, datetime, timedelta
from math import ceil
from typing import Any, Dict, List, Optional, Tuple, Union, cast
from urllib.parse import parse_qs, urlencode

Expand Down Expand Up @@ -1229,14 +1228,37 @@ def store_search_query(request: HttpRequest, search_results: dict) -> None:
return

if is_es_search:
search_query.query_time_ms = ceil(search_results["results_details"][0])
search_query.query_time_ms = search_results["results_details"][0]
# do_es_search returns 1 as query time if the micro cache was hit
search_query.hit_cache = search_query.query_time_ms == 1
else:
# Solr searches are not cached unless a cache_key is passed
# No cache_key is passed for the endpoints we are storing
search_query.query_time_ms = ceil(
search_results["results"].object_list.QTime
)
search_query.query_time_ms = search_results[
"results"
].object_list.QTime

search_query.save()


def store_search_api_query(
request: HttpRequest, failed: bool, query_time: int | None, engine: int
) -> None:
"""Store the search query from the Search API.
:param request: The HTTP request object.
:param failed: Boolean indicating if the query execution failed.
:param query_time: The time taken to execute the query in milliseconds or
None if not applicable.
:param engine: The search engine used to execute the query.
:return: None
"""
SearchQuery.objects.create(
user=None if request.user.is_anonymous else request.user,
get_params=request.GET.urlencode(),
failed=failed,
query_time_ms=query_time,
hit_cache=False,
source=SearchQuery.API,
engine=engine,
)
9 changes: 9 additions & 0 deletions cl/search/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
Parenthetical,
ParentheticalGroup,
RECAPDocument,
SearchQuery,
)
from cl.search.tasks import add_items_to_solr

Expand Down Expand Up @@ -351,3 +352,11 @@ class ParentheticalGroupAdmin(CursorPaginatorAdmin):
"opinion",
"representative",
)


@admin.register(SearchQuery)
class SearchQueryAdmin(CursorPaginatorAdmin):
raw_id_fields = ("user",)
list_display = ("__str__", "engine", "source")
list_filter = ("engine", "source")
search_fields = ("user__username",)
72 changes: 63 additions & 9 deletions cl/search/api_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
set_results_highlights,
)
from cl.lib.scorched_utils import ExtraSolrInterface
from cl.lib.search_utils import store_search_api_query
from cl.lib.utils import map_to_docket_entry_sorting
from cl.search.constants import SEARCH_HL_TAG, cardinality_query_unique_ids
from cl.search.documents import (
Expand All @@ -33,7 +34,7 @@
PersonDocument,
)
from cl.search.exception import ElasticBadRequestError, ElasticServerError
from cl.search.models import SEARCH_TYPES
from cl.search.models import SEARCH_TYPES, SearchQuery
from cl.search.types import ESCursor

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -119,13 +120,19 @@ def get_object_list(request, cd, paginator):
or is_recap_active
):
sl = ESList(
request=request,
main_query=main_query,
offset=offset,
page_size=page_size,
type=cd["type"],
)
else:
sl = SolrList(main_query=main_query, offset=offset, type=cd["type"])
sl = SolrList(
request=request,
main_query=main_query,
offset=offset,
type=cd["type"],
)

return sl

Expand All @@ -135,8 +142,11 @@ class ESList:
as they are queried.
"""

def __init__(self, main_query, offset, page_size, type, length=None):
def __init__(
self, request, main_query, offset, page_size, type, length=None
):
super().__init__()
self.request = request
self.main_query = main_query
self.offset = offset
self.page_size = page_size
Expand Down Expand Up @@ -170,7 +180,29 @@ def __getitem__(self, item):
self.main_query = self.main_query[
self.offset : self.offset + self.page_size
]
results = self.main_query.execute()

error_to_raise = None
try:
results = self.main_query.execute()
except (TransportError, ConnectionError, RequestError) as e:
error_to_raise = ElasticServerError
except ApiError as e:
if "Failed to parse query" in str(e):
error_to_raise = ElasticBadRequestError
else:
logger.error("Multi-search API Error: %s", e)
error_to_raise = ElasticServerError

# Store search query.
store_search_api_query(
request=self.request,
failed=bool(error_to_raise),
query_time=results.took if not error_to_raise else None,
engine=SearchQuery.ELASTICSEARCH,
)

if error_to_raise:
raise error_to_raise()

# Merge unavailable fields in ES by pulling data from the DB to make
# the API backwards compatible for People.
Expand Down Expand Up @@ -210,8 +242,9 @@ class SolrList:
queried.
"""

def __init__(self, main_query, offset, type, length=None):
def __init__(self, request, main_query, offset, type, length=None):
super().__init__()
self.request = request
self.main_query = main_query
self.offset = offset
self.type = type
Expand Down Expand Up @@ -245,6 +278,13 @@ def __getitem__(self, item):
self.main_query["start"] = self.offset
r = self.conn.query().add_extra(**self.main_query).execute()
self.conn.conn.http_connection.close()
# Store search query.
store_search_api_query(
request=self.request,
failed=False,
query_time=r.QTime,
engine=SearchQuery.SOLR,
)
if r.group_field is None:
# Pull the text snippet up a level
for result in r.result.docs:
Expand Down Expand Up @@ -305,12 +345,14 @@ def __init__(
page_size,
search_after,
clean_data,
request,
):
self.main_query = main_query
self.child_docs_query = child_docs_query
self.page_size = page_size
self.search_after = search_after
self.clean_data = clean_data
self.request = request
self.cursor = None
self.results = None
self.reverse = False
Expand Down Expand Up @@ -372,6 +414,8 @@ def get_paginated_results(
child_cardinality_query = build_cardinality_count(
child_count_query, child_unique_field
)

error_to_raise = None
try:
multi_search = MultiSearch()
multi_search = multi_search.add(self.main_query).add(
Expand All @@ -388,15 +432,25 @@ def get_paginated_results(
if child_cardinality_query:
child_cardinality_count_response = responses[2]
except (TransportError, ConnectionError, RequestError) as e:
raise ElasticServerError()
error_to_raise = ElasticServerError
except ApiError as e:
if "Failed to parse query" in str(e):
raise ElasticBadRequestError()
error_to_raise = ElasticBadRequestError
else:
logger.error("Multi-search API Error: %s", e)
raise ElasticServerError()
self.process_results(self.results)
error_to_raise = ElasticServerError

# Store search query.
store_search_api_query(
request=self.request,
failed=bool(error_to_raise),
query_time=self.results.took if not error_to_raise else None,
engine=SearchQuery.ELASTICSEARCH,
)
if error_to_raise:
raise error_to_raise()

self.process_results(self.results)
main_query_hits = self.results.hits.total.value
es_results_items = [
defaultdict(lambda: None, result.to_dict(skip_empty=False))
Expand Down
6 changes: 1 addition & 5 deletions cl/search/api_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,11 +363,7 @@ def list(self, request, *args, **kwargs):
request.version,
)
es_list_instance = api_utils.CursorESList(
main_query,
child_docs_query,
None,
None,
cd,
main_query, child_docs_query, None, None, cd, request
)
results_page = paginator.paginate_queryset(
es_list_instance, request
Expand Down
Loading

0 comments on commit 6ae2865

Please sign in to comment.