Skip to content

Commit

Permalink
fix(search): Revolved issues for filter only queries and alerts
Browse files Browse the repository at this point in the history
  • Loading branch information
albertisfu committed Dec 20, 2024
1 parent 2319e71 commit 09d80ff
Show file tree
Hide file tree
Showing 7 changed files with 306 additions and 28 deletions.
3 changes: 2 additions & 1 deletion cl/alerts/tests/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -2392,7 +2392,7 @@ def test_es_alert_update_and_delete(self, mock_abort_audio):
user=self.user_profile.user,
rate=Alert.REAL_TIME,
name="Test Alert OA",
query="type=oa&docket_number=19-1010",
query="type=oa&docket_number=19-1010&order_by=score desc",
alert_type=SEARCH_TYPES.ORAL_ARGUMENT,
)

Expand All @@ -2402,6 +2402,7 @@ def test_es_alert_update_and_delete(self, mock_abort_audio):
response_str = str(doc.to_dict())
self.assertIn("'query': '19-1010'", response_str)
self.assertIn("'rate': 'rt'", response_str)
self.assertNotIn("function_score", response_str)

# Update Alert
search_alert_1.query = "type=oa&docket_number=19-1020"
Expand Down
8 changes: 7 additions & 1 deletion cl/alerts/tests/tests_recap_alerts.py
Original file line number Diff line number Diff line change
Expand Up @@ -2221,13 +2221,19 @@ def test_index_and_delete_recap_alerts_from_percolator(
user=self.user_profile.user,
rate=Alert.WEEKLY,
name="Test Alert Docket Only",
query='q="401 Civil"&type=r',
query='q="401 Civil"&type=r&order_by=score desc',
alert_type=SEARCH_TYPES.RECAP,
)
self.assertTrue(
RECAPPercolator.exists(id=docket_only_alert.pk),
msg=f"Alert id: {docket_only_alert.pk} was not indexed.",
)
alert_doc = RECAPPercolator.get(id=docket_only_alert.pk)
response_str = str(alert_doc.to_dict())
self.assertIn("401 Civil", response_str)
self.assertIn("'rate': 'wly'", response_str)
# function_score breaks percolator queries. Ensure it is never indexed.
self.assertNotIn("function_score", response_str)

docket_only_alert_id = docket_only_alert.pk
# Remove the alert.
Expand Down
23 changes: 15 additions & 8 deletions cl/lib/elasticsearch_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1084,12 +1084,6 @@ def combine_plain_filters_and_queries(
final_query.filter = reduce(operator.iand, filters)
if filters and string_query:
final_query.minimum_should_match = 1

if cd["type"] == SEARCH_TYPES.ORAL_ARGUMENT:
# Apply custom score for dateArgued sorting in the V4 API.
final_query = apply_custom_score_to_main_query(
cd, final_query, api_version
)
return final_query


Expand Down Expand Up @@ -1377,14 +1371,24 @@ def build_es_base_query(
child_query=child_docs_query,
)

boost_mode = "multiply"
if plain_doc:
# Combine the filters and string query for plain documents like Oral
# arguments and parentheticals
main_query = combine_plain_filters_and_queries(
cd, filters, string_query, api_version
)
if not string_query:
boost_mode = "replace"
else:
main_query_dict = main_query.to_dict()
contain_query_string = "should" in main_query_dict["bool"]["should"][1]["bool"] and "query_string" in main_query_dict["bool"]["should"][1]["bool"]["should"][0]
if not contain_query_string:
boost_mode = "replace"


main_query = apply_custom_score_to_main_query(cd, main_query, api_version, boost_mode=boost_mode)

main_query = apply_custom_score_to_main_query(cd, main_query, api_version)
return EsMainQueries(
search_query=search_query.query(main_query),
parent_query=parent_query,
Expand Down Expand Up @@ -2219,6 +2223,7 @@ def fetch_es_results(

# Execute the ES main query + count queries in a single request.
multi_search = MultiSearch()
print("MAin query: ", main_query.to_dict())
multi_search = multi_search.add(main_query).add(main_doc_count_query)
if child_total_query:
multi_search = multi_search.add(child_total_query)
Expand Down Expand Up @@ -2567,6 +2572,7 @@ def apply_custom_score_to_main_query(
:param cd: The query CleanedData
:param query: The ES Query object to be modified.
:param api_version: Optional, the request API version.
:param boost_mode: Optional, the boost mode to apply for the decay relevancy score
:return: The function_score query contains the base query, applied when
child_order is used.
"""
Expand All @@ -2588,9 +2594,10 @@ def apply_custom_score_to_main_query(
)

valid_decay_relevance_types = {
SEARCH_TYPES.OPINION: ["dateFiled"],
SEARCH_TYPES.RECAP: ["dateFiled"],
SEARCH_TYPES.DOCKETS: ["dateFiled"],
SEARCH_TYPES.RECAP_DOCUMENT: ["dateFiled", "entry_date_filed"],
SEARCH_TYPES.RECAP_DOCUMENT: ["dateFiled"],
SEARCH_TYPES.ORAL_ARGUMENT: ["dateArgued"],
}
main_order_by = cd.get("order_by", "")
Expand Down
1 change: 1 addition & 0 deletions cl/search/api_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ def __getitem__(self, item):

error_to_raise = None
try:
print("Query V3: ", self.main_query.to_dict())
results = self.main_query.execute()
except (TransportError, ConnectionError, RequestError) as e:
error_to_raise = ElasticServerError
Expand Down
8 changes: 8 additions & 0 deletions cl/search/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,11 @@ def prepare_timestamp(self, instance):

def prepare_percolator_query(self, instance):
qd = QueryDict(instance.query.encode(), mutable=True)
if "order_by" in qd:
# sorting key is not required in percolator queries. Adding it
# generates a custom function score for decay relevance, which breaks
# percolator queries.
del qd["order_by"]
search_form = SearchForm(qd)
if not search_form.is_valid():
logger.warning(
Expand Down Expand Up @@ -1988,6 +1993,9 @@ def prepare_percolator_query(self, instance):
from cl.alerts.utils import build_plain_percolator_query

qd = QueryDict(instance.query.encode(), mutable=True)
# For RECAP percolator queries, we use build_plain_percolator_query to
# build the query. It does not add a custom function_score, so there is
# no need to remove the order_by sorting key as it is ignored.
search_form = SearchForm(qd)
if not search_form.is_valid():
logger.warning(
Expand Down
256 changes: 256 additions & 0 deletions cl/search/tests/tests_es_opinion.py
Original file line number Diff line number Diff line change
Expand Up @@ -2269,6 +2269,262 @@ def test_uses_exact_version_for_case_name_field(self) -> None:
cluster_2.delete()


class OpinionSearchDecayRelevancyTest(
ESIndexTestCase, V4SearchAPIAssertions, TestCase
):
"""
Opinion Search Decay Relevancy Tests
"""

@classmethod
def setUpTestData(cls):
# Rebuild the Opinion index
cls.rebuild_index("search.OpinionCluster")


# Same keywords but different date_filed
cls.opinion_old = OpinionClusterFactory.create(
case_name="Keyword Match",
case_name_full="",
case_name_short="",
date_filed=datetime.date(1732, 2, 23),
procedural_history="",
source="C",
attorneys="",
slug="opinion-old",
precedential_status="Published",
docket=DocketFactory(
case_name="Base Docket",
docket_number="1:21-bk-1235",
source=Docket.HARVARD,
date_filed=datetime.date(1900, 1, 1),
)
)
cls.opinion_recent = OpinionClusterFactory.create(
case_name="Keyword Match",
case_name_full="",
case_name_short="",
date_filed=datetime.date(2024, 2, 23),
procedural_history="",
source="C",
attorneys="",
slug="opinion-recent",
precedential_status="Published",
docket=DocketFactory(
case_name="Base Docket",
docket_number="1:21-bk-1236",
source=Docket.HARVARD,
date_filed=datetime.date(1900, 1, 1),
)
)

# Different relevance with same dateFiled
cls.opinion_high_relevance = OpinionClusterFactory.create(
case_name="Highly Relevant Keywords",
case_name_full="",
case_name_short="",
date_filed=datetime.date(2022, 2, 23),
procedural_history="More Highly Relevant Keywords",
source="C",
attorneys="More Highly Relevant Keywords",
slug="opinion-high-rel",
precedential_status="Published",
docket=DocketFactory(
case_name="Base Docket",
docket_number="1:21-bk-1237",
source=Docket.HARVARD,
date_filed=datetime.date(1900, 1, 1),
),
)
cls.opinion_low_relevance = OpinionClusterFactory.create(
case_name="Highly Relevant Keywords",
case_name_full="",
case_name_short="",
date_filed=datetime.date(2022, 2, 23),
procedural_history="",
source="C",
attorneys="",
slug="opinion-low-rel",
precedential_status="Published",
docket=DocketFactory(
case_name="Base Docket",
docket_number="1:21-bk-1238",
source=Docket.HARVARD,
date_filed=datetime.date(1900, 1, 1),
),
)


# Different relevance with different dateFiled
cls.opinion_high_relevance_old_date = OpinionClusterFactory.create(
case_name="Ipsum Dolor Terms",
case_name_full="",
case_name_short="",
date_filed=datetime.date(1800, 2, 23),
procedural_history="More Ipsum Dolor Terms",
source="C",
attorneys="More Ipsum Dolor Terms",
slug="opinion-high-rel-old",
precedential_status="Published",
docket=DocketFactory(
case_name="Base Docket",
docket_number="1:21-bk-1239",
source=Docket.HARVARD,
date_filed=datetime.date(1900, 1, 1),
),
)

cls.opinion_low_relevance_new_date = OpinionClusterFactory.create(
case_name="Ipsum Dolor Terms",
case_name_full="",
case_name_short="",
date_filed=datetime.date(2024, 12, 23),
procedural_history="",
source="C",
attorneys="",
slug="opinion-low-rel-new",
precedential_status="Published",
docket=DocketFactory(
case_name="Base Docket",
docket_number="1:21-bk-1241",
source=Docket.HARVARD,
date_filed=datetime.date(1900, 1, 1),
),
)

super().setUpTestData()
call_command(
"cl_index_parent_and_child_docs",
search_type=SEARCH_TYPES.OPINION,
queue="celery",
pk_offset=0,
testing_mode=True,
)

def _assert_order_in_html(
self, decoded_content: str, expected_order: list
) -> None:
"""Assert that the expected order of fields appears correctly in the HTML content."""

for i in range(len(expected_order) - 1):
print("str(expected_order[i])", str(expected_order[i]))
print("str(expected_order[i + 1])", str(expected_order[i + 1]))

self.assertTrue(
decoded_content.index(str(expected_order[i]))
< decoded_content.index(str(expected_order[i + 1])),
f"Expected {expected_order[i]} to appear before {expected_order[i + 1]} in the HTML content.",
)

async def _test_article_count(self, params, expected_count, field_name):
r = await self.async_client.get("/", params)
tree = html.fromstring(r.content.decode())
got = len(tree.xpath("//article"))
self.assertEqual(
got,
expected_count,
msg="Did not get the right number of search results in Frontend with %s "
"filter applied.\n"
"Expected: %s\n"
" Got: %s\n\n"
"Params were: %s" % (field_name, expected_count, got, params),
)
return r

def test_relevancy_decay_scoring(self) -> None:
"""Test relevancy decay scoring for Opinion search results."""
test_cases = [
{
"name": "Same keywords, order by score desc",
"search_params": {
"q": "Keyword Match",
"order_by": "score desc",
"type": SEARCH_TYPES.OPINION,
},
"expected_order_frontend": [
self.opinion_recent.docket.docket_number, # Most recent dateFiled
self.opinion_old.docket.docket_number, # Oldest dateFiled
],
"expected_order": [
self.opinion_recent.pk, # Most recent dateFiled
self.opinion_old.pk, # Oldest dateFiled
],
},
{
"name": "Different relevancy same dateFiled, order by score desc",
"search_params": {
"q": "Highly Relevant Keywords",
"order_by": "score desc",
"type": SEARCH_TYPES.OPINION,
},
"expected_order_frontend": [
self.opinion_high_relevance.docket.docket_number, # Most relevant by keywords
self.opinion_low_relevance.docket.docket_number, # Less relevant by keywords
],
"expected_order": [
self.opinion_high_relevance.pk, # Most relevant by keywords
self.opinion_low_relevance.pk, # Less relevant by keywords
],
},
{
"name": "Different relevancy different dateFiled, order by score desc",
"search_params": {
"q": "Ipsum Dolor Terms",
"order_by": "score desc",
"type": SEARCH_TYPES.OPINION,
},
"expected_order_frontend": [
self.opinion_low_relevance_new_date.docket.docket_number,
self.opinion_high_relevance_old_date.docket.docket_number,
],
"expected_order": [
self.opinion_low_relevance_new_date.pk,
self.opinion_high_relevance_old_date.pk,
],
},
{
"name": "Match all query decay relevancy.",
"search_params": {
"q": "",
"order_by": "score desc",
"type": SEARCH_TYPES.OPINION,
},
# Order by recency and then by relevancy as per decay scoring logic
"expected_order_frontend": [
self.opinion_low_relevance_new_date.docket.docket_number, # 2024-12-23 1:21-bk-1241
self.opinion_recent.docket.docket_number, # 2024-02-23 1:21-bk-1236
self.opinion_high_relevance.docket.docket_number, # 2022-02-23 1:21-bk-1237
self.opinion_low_relevance.docket.docket_number, # 2022-02-23 1:21-bk-1238
self.opinion_high_relevance_old_date.docket.docket_number, # 1800-02-23 1:21-bk-1239
self.opinion_old.docket.docket_number, # 1732-02-23 1:21-bk-1235
],
"expected_order": [
self.opinion_low_relevance_new_date.pk, # 2024-12-23
self.opinion_recent.pk, # 2024-02-23
self.opinion_low_relevance.pk, # 2022-02-23 Higher PK
self.opinion_high_relevance.pk, # 2022-02-23 More relevant Lower PK
self.opinion_high_relevance_old_date.pk, # 1800-02-23
self.opinion_old.pk, # 1732-02-23
],
},
]

for test in test_cases:
with self.subTest(test["name"]):
r = async_to_sync(self._test_article_count)(
test["search_params"],
len(test["expected_order_frontend"]),
f"Failed count {test['name']}",
)
self._assert_order_in_html(r.content.decode(), test["expected_order_frontend"])

for test in test_cases:
self._test_results_ordering(test, "cluster_id")





@override_flag("ui_flag_for_o", False)
@override_settings(RELATED_MLT_MINTF=1)
class RelatedSearchTest(
Expand Down
Loading

0 comments on commit 09d80ff

Please sign in to comment.