Skip to content

Commit

Permalink
fix(search): Remove function_score from count queries
Browse files Browse the repository at this point in the history
- Removed redundant test for RECAP Search
  • Loading branch information
albertisfu committed Dec 23, 2024
1 parent f817476 commit 6dc5273
Show file tree
Hide file tree
Showing 6 changed files with 129 additions and 130 deletions.
45 changes: 19 additions & 26 deletions cl/lib/elasticsearch_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@
SEARCH_RECAP_PARENT_QUERY_FIELDS,
api_child_highlight_map,
cardinality_query_unique_ids,
date_decay_relevance_types,
recap_boosts_es,
)
from cl.search.exception import (
Expand Down Expand Up @@ -2127,15 +2128,27 @@ def merge_unavailable_fields_on_parent_document(
def clean_count_query(search_query: Search) -> SearchDSL:
"""Cleans a given ES Search object for a count query.
Modifies the input Search object by removing 'inner_hits' from
any 'has_child' queries within the 'should' clause of the boolean query.
Modifies the input Search object by removing 'function_score' from the main
query if present and/or 'inner_hits' from any 'has_child' queries within
the 'should' clause of the boolean query.
It then creates a new Search object with the modified query.
:param search_query: The ES Search object.
:return: A new ES Search object with the count query.
"""

parent_total_query_dict = search_query.to_dict()
parent_total_query_dict = search_query.to_dict(count=True)
try:
# Clean function_score in queries that contain it
parent_total_query_dict = parent_total_query_dict["query"][
"function_score"
]
del parent_total_query_dict["boost_mode"]
del parent_total_query_dict["functions"]
except KeyError:
# Omit queries that don't contain it.
pass

try:
# Clean the has_child query in queries that contain it.
for query in parent_total_query_dict["query"]["bool"]["should"]:
Expand Down Expand Up @@ -2571,29 +2584,9 @@ def apply_custom_score_to_main_query(
else False
)

valid_decay_relevance_types: dict[str, dict[str, str | int | float]] = {
SEARCH_TYPES.OPINION: {
"field": "dateFiled",
"scale": 50,
"decay": 0.5,
},
SEARCH_TYPES.RECAP: {"field": "dateFiled", "scale": 50, "decay": 0.5},
SEARCH_TYPES.DOCKETS: {
"field": "dateFiled",
"scale": 50,
"decay": 0.5,
},
SEARCH_TYPES.RECAP_DOCUMENT: {
"field": "dateFiled",
"scale": 50,
"decay": 0.5,
},
SEARCH_TYPES.ORAL_ARGUMENT: {
"field": "dateArgued",
"scale": 50,
"decay": 0.5,
},
}
valid_decay_relevance_types: dict[str, dict[str, str | int | float]] = (
date_decay_relevance_types
)
main_order_by = cd.get("order_by", "")
if is_valid_custom_score_field and api_version == "v4":
# Applies a custom function score to sort Documents based on
Expand Down
21 changes: 13 additions & 8 deletions cl/search/api_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
build_cardinality_count,
build_es_main_query,
build_sort_results,
clean_count_query,
do_collapse_count_query,
do_count_query,
do_es_api_query,
Expand All @@ -21,7 +22,6 @@
set_results_highlights,
)
from cl.lib.search_utils import store_search_api_query
from cl.lib.utils import map_to_docket_entry_sorting
from cl.search.constants import SEARCH_HL_TAG, cardinality_query_unique_ids
from cl.search.documents import (
AudioDocument,
Expand Down Expand Up @@ -260,23 +260,25 @@ def get_paginated_results(
self.main_query = self.main_query.sort(default_sorting, unique_sorting)

# Cardinality query parameters
query = Q(self.main_query.to_dict(count=True)["query"])
main_count_query = clean_count_query(self.main_query)
unique_field = cardinality_query_unique_ids[self.clean_data["type"]]
search_document = self.cardinality_base_document[
self.clean_data["type"]
]
main_count_query = search_document.search().query(query)
cardinality_query = build_cardinality_count(
main_count_query, unique_field
)

# Build a cardinality query to count child documents.
child_cardinality_query = None
child_cardinality_count_response = None
if self.child_docs_query:
if (
self.child_docs_query
and self.clean_data["type"] == SEARCH_TYPES.RECAP
):
child_unique_field = cardinality_query_unique_ids[
SEARCH_TYPES.RECAP_DOCUMENT
]
search_document = self.cardinality_base_document[
self.clean_data["type"]
]
child_count_query = search_document.search().query(
self.child_docs_query
)
Expand All @@ -292,7 +294,10 @@ def get_paginated_results(
)
# If a cardinality query is available for the search_type, add it
# to the multi-search query.
if child_cardinality_query:
if (
child_cardinality_query
and self.clean_data["type"] == SEARCH_TYPES.RECAP
):
multi_search = multi_search.add(child_cardinality_query)

responses = multi_search.execute()
Expand Down
25 changes: 25 additions & 0 deletions cl/search/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,3 +275,28 @@
SEARCH_TYPES.ORAL_ARGUMENT: "id",
SEARCH_TYPES.PARENTHETICAL: "id",
}


date_decay_relevance_types = {
SEARCH_TYPES.OPINION: {
"field": "dateFiled",
"scale": 50,
"decay": 0.5,
},
SEARCH_TYPES.RECAP: {"field": "dateFiled", "scale": 50, "decay": 0.5},
SEARCH_TYPES.DOCKETS: {
"field": "dateFiled",
"scale": 50,
"decay": 0.5,
},
SEARCH_TYPES.RECAP_DOCUMENT: {
"field": "dateFiled",
"scale": 50,
"decay": 0.5,
},
SEARCH_TYPES.ORAL_ARGUMENT: {
"field": "dateArgued",
"scale": 50,
"decay": 0.5,
},
}
32 changes: 16 additions & 16 deletions cl/search/tests/tests_es_opinion.py
Original file line number Diff line number Diff line change
Expand Up @@ -2281,7 +2281,7 @@ def setUpTestData(cls):
# Rebuild the Opinion index
cls.rebuild_index("search.OpinionCluster")

# Same keywords but different date_filed
# Same keywords but different dateFiled
cls.opinion_old = OpinionClusterFactory.create(
case_name="Keyword Match",
case_name_full="",
Expand Down Expand Up @@ -2425,7 +2425,7 @@ def setUpTestData(cls):

cls.test_cases = [
{
"name": "Same keywords, order by score desc",
"name": "Same keywords, different dateFiled",
"search_params": {
"q": "Keyword Match",
"order_by": "score desc",
Expand All @@ -2435,13 +2435,13 @@ def setUpTestData(cls):
cls.opinion_recent.docket.docket_number, # Most recent dateFiled
cls.opinion_old.docket.docket_number, # Oldest dateFiled
],
"expected_order": [
cls.opinion_recent.pk, # Most recent dateFiled
cls.opinion_old.pk, # Oldest dateFiled
"expected_order": [ # API
cls.opinion_recent.pk,
cls.opinion_old.pk,
],
},
{
"name": "Different relevancy same dateFiled, order by score desc",
"name": "Different relevancy same dateFiled",
"search_params": {
"q": "Highly Relevant Keywords",
"order_by": "score desc",
Expand All @@ -2451,23 +2451,23 @@ def setUpTestData(cls):
cls.opinion_high_relevance.docket.docket_number, # Most relevant by keywords
cls.opinion_low_relevance.docket.docket_number, # Less relevant by keywords
],
"expected_order": [
"expected_order": [ # API
cls.opinion_high_relevance.pk, # Most relevant by keywords
cls.opinion_low_relevance.pk, # Less relevant by keywords
],
},
{
"name": "Different relevancy different dateFiled, order by score desc",
"name": "Different relevancy and different dateFiled",
"search_params": {
"q": "Ipsum Dolor Terms",
"order_by": "score desc",
"type": SEARCH_TYPES.OPINION,
},
"expected_order_frontend": [
cls.opinion_low_relevance_new_date.docket.docket_number,
cls.opinion_low_relevance_new_date.docket.docket_number, # Combination of relevance and date rank it first.
cls.opinion_high_relevance_old_date.docket.docket_number,
],
"expected_order": [
"expected_order": [ # API
cls.opinion_low_relevance_new_date.pk,
cls.opinion_high_relevance_old_date.pk,
],
Expand All @@ -2483,23 +2483,23 @@ def setUpTestData(cls):
"expected_order_frontend": [
cls.opinion_low_relevance_new_date.docket.docket_number, # 2024-12-23 1:21-bk-1241
cls.opinion_recent.docket.docket_number, # 2024-02-23 1:21-bk-1236
cls.opinion_high_relevance.docket.docket_number, # 2022-02-23 1:21-bk-1237
cls.opinion_high_relevance.docket.docket_number, # 2022-02-23 1:21-bk-1237 Indexed first, displayed first.
cls.opinion_low_relevance.docket.docket_number, # 2022-02-23 1:21-bk-1238
cls.opinion_high_relevance_old_date.docket.docket_number, # 1800-02-23 1:21-bk-1239
cls.opinion_old.docket.docket_number, # 1732-02-23 1:21-bk-1235
],
"expected_order": [
"expected_order": [ # V4 API
cls.opinion_low_relevance_new_date.pk, # 2024-12-23
cls.opinion_recent.pk, # 2024-02-23
cls.opinion_low_relevance.pk, # 2022-02-23 Higher PK
cls.opinion_high_relevance.pk, # 2022-02-23 More relevant Lower PK
cls.opinion_low_relevance.pk, # 2022-02-23 Higher PK in V4, API pk is a secondary sorting key.
cls.opinion_high_relevance.pk, # 2022-02-23 Lower PK
cls.opinion_high_relevance_old_date.pk, # 1800-02-23
cls.opinion_old.pk, # 1732-02-23
],
"expected_order_v3": [
"expected_order_v3": [ # V3 API
cls.opinion_low_relevance_new_date.pk, # 2024-12-23
cls.opinion_recent.pk, # 2024-02-23
cls.opinion_high_relevance.pk, # 2022-02-23 Indexed first
cls.opinion_high_relevance.pk, # 2022-02-23 Indexed first, displayed first.
cls.opinion_low_relevance.pk, # 2022-02-23
cls.opinion_high_relevance_old_date.pk, # 1800-02-23
cls.opinion_old.pk, # 1732-02-23
Expand Down
Loading

0 comments on commit 6dc5273

Please sign in to comment.