diff --git a/cl/lib/elasticsearch_utils.py b/cl/lib/elasticsearch_utils.py index 9e8fa0bdd2..0e55027b4d 100644 --- a/cl/lib/elasticsearch_utils.py +++ b/cl/lib/elasticsearch_utils.py @@ -37,6 +37,7 @@ ApiPositionMapping, BasePositionMapping, CleanData, + EsJoinQueries, EsMainQueries, ESRangeQueryParams, ) @@ -1089,19 +1090,16 @@ def combine_plain_filters_and_queries( def get_match_all_query( cd: CleanData, - search_query: Search, api_version: Literal["v3", "v4"] | None = None, child_highlighting: bool = True, -) -> Search: +) -> Query: """Build and return a match-all query for each type of document. :param cd: The query CleanedData - :param search_query: Elasticsearch DSL Search object :param api_version: Optional, the request API version. :param child_highlighting: Whether highlighting should be enabled in child docs. - :return: The modified Search object based on the given conditions. + :return: The Match All Query object. """ - _, query_hits_limit = get_child_top_hits_limit( cd, cd["type"], api_version=api_version ) @@ -1125,9 +1123,6 @@ def get_match_all_query( final_match_all_query = Q( "bool", should=q_should, minimum_should_match=1 ) - final_match_all_query = apply_custom_score_to_main_query( - cd, final_match_all_query, api_version - ) case SEARCH_TYPES.RECAP | SEARCH_TYPES.DOCKETS: # Match all query for RECAP and Dockets, it'll return dockets # with child documents and also empty dockets. @@ -1149,9 +1144,6 @@ def get_match_all_query( should=[match_all_child_query, match_all_parent_query], minimum_should_match=1, ) - final_match_all_query = apply_custom_score_to_main_query( - cd, final_match_all_query, api_version, boost_mode="replace" - ) case SEARCH_TYPES.OPINION: # Only return Opinion clusters. match_all_child_query = build_has_child_query( @@ -1169,18 +1161,12 @@ def get_match_all_query( final_match_all_query = Q( "bool", should=q_should, minimum_should_match=1 ) - final_match_all_query = apply_custom_score_to_main_query( - cd, final_match_all_query, api_version, boost_mode="replace" - ) case _: # No string_query or filters in plain search types like OA and # Parentheticals. Use a match_all query. - match_all_query = Q("match_all") - final_match_all_query = apply_custom_score_to_main_query( - cd, match_all_query, api_version, boost_mode="replace" - ) + final_match_all_query = Q("match_all") - return search_query.query(final_match_all_query) + return final_match_all_query def build_es_base_query( @@ -1207,10 +1193,13 @@ def build_es_base_query( main_query = None string_query = None - child_docs_query = None + child_query = None parent_query = None filters = [] plain_doc = False + join_queries = None + has_text_query = False + match_all_query = False match cd["type"]: case SEARCH_TYPES.PARENTHETICAL: filters = build_es_plain_filters(cd) @@ -1253,14 +1242,12 @@ def build_es_base_query( ], ) ) - main_query, child_docs_query, parent_query = ( - build_full_join_es_queries( - cd, - child_query_fields, - parent_query_fields, - child_highlighting=child_highlighting, - api_version=api_version, - ) + join_queries = build_full_join_es_queries( + cd, + child_query_fields, + parent_query_fields, + child_highlighting=child_highlighting, + api_version=api_version, ) case ( @@ -1286,15 +1273,13 @@ def build_es_base_query( ], ) ) - main_query, child_docs_query, parent_query = ( - build_full_join_es_queries( - cd, - child_query_fields, - parent_query_fields, - child_highlighting=child_highlighting, - api_version=api_version, - alerts=alerts, - ) + join_queries = build_full_join_es_queries( + cd, + child_query_fields, + parent_query_fields, + child_highlighting=child_highlighting, + api_version=api_version, + alerts=alerts, ) case SEARCH_TYPES.OPINION: @@ -1306,20 +1291,19 @@ def build_es_base_query( mlt_query = async_to_sync(build_more_like_this_query)( cluster_pks ) - main_query, child_docs_query, parent_query = ( - build_full_join_es_queries( - cd, - {"opinion": []}, - [], - mlt_query, - child_highlighting=True, - api_version=api_version, - ) + join_queries = build_full_join_es_queries( + cd, + {"opinion": []}, + [], + mlt_query, + child_highlighting=True, + api_version=api_version, ) return EsMainQueries( - search_query=search_query.query(main_query), - parent_query=parent_query, - child_query=child_docs_query, + search_query=search_query.query(join_queries.main_query), + boost_mode="multiply", + parent_query=join_queries.parent_query, + child_query=join_queries.child_query, ) opinion_search_fields = SEARCH_OPINION_QUERY_FIELDS @@ -1346,53 +1330,48 @@ def build_es_base_query( ], ) ) - main_query, child_docs_query, parent_query = ( - build_full_join_es_queries( - cd, - child_query_fields, - parent_query_fields, - mlt_query, - child_highlighting=child_highlighting, - api_version=api_version, - alerts=alerts, - ) + join_queries = build_full_join_es_queries( + cd, + child_query_fields, + parent_query_fields, + mlt_query, + child_highlighting=child_highlighting, + api_version=api_version, + alerts=alerts, ) + if join_queries is not None: + main_query = join_queries.main_query + parent_query = join_queries.parent_query + child_query = join_queries.child_query + has_text_query = join_queries.has_text_query + if not any([filters, string_query, main_query]): # No filters, string_query or main_query provided by the user, return a # match_all query - match_all_query = get_match_all_query( - cd, search_query, api_version, child_highlighting - ) - - return EsMainQueries( - search_query=match_all_query, - parent_query=parent_query, - child_query=child_docs_query, - ) + main_query = get_match_all_query(cd, api_version, child_highlighting) + match_all_query = True - boost_mode = "multiply" - if plain_doc: + boost_mode = "multiply" if has_text_query else "replace" + if plain_doc and not match_all_query: # Combine the filters and string query for plain documents like Oral # arguments and parentheticals main_query = combine_plain_filters_and_queries( cd, filters, string_query, api_version ) - if not string_query: - boost_mode = "replace" - else: - main_query_dict = main_query.to_dict() - contain_query_string = "should" in main_query_dict["bool"]["should"][1]["bool"] and "query_string" in main_query_dict["bool"]["should"][1]["bool"]["should"][0] - if not contain_query_string: - boost_mode = "replace" - + boost_mode = "multiply" if string_query else "replace" - main_query = apply_custom_score_to_main_query(cd, main_query, api_version, boost_mode=boost_mode) + # Apply a custom function score to the main query, useful for cursor pagination + # in the V4 API and for date decay relevance. + main_query = apply_custom_score_to_main_query( + cd, main_query, api_version, boost_mode=boost_mode + ) return EsMainQueries( search_query=search_query.query(main_query), + boost_mode=boost_mode, parent_query=parent_query, - child_query=child_docs_query, + child_query=child_query, ) @@ -2223,7 +2202,6 @@ def fetch_es_results( # Execute the ES main query + count queries in a single request. multi_search = MultiSearch() - print("MAin query: ", main_query.to_dict()) multi_search = multi_search.add(main_query).add(main_doc_count_query) if child_total_query: multi_search = multi_search.add(child_total_query) @@ -2593,12 +2571,28 @@ def apply_custom_score_to_main_query( else False ) - valid_decay_relevance_types = { - SEARCH_TYPES.OPINION: ["dateFiled"], - SEARCH_TYPES.RECAP: ["dateFiled"], - SEARCH_TYPES.DOCKETS: ["dateFiled"], - SEARCH_TYPES.RECAP_DOCUMENT: ["dateFiled"], - SEARCH_TYPES.ORAL_ARGUMENT: ["dateArgued"], + valid_decay_relevance_types: dict[str, dict[str, str | int | float]] = { + SEARCH_TYPES.OPINION: { + "field": "dateFiled", + "scale": 50, + "decay": 0.5, + }, + SEARCH_TYPES.RECAP: {"field": "dateFiled", "scale": 50, "decay": 0.5}, + SEARCH_TYPES.DOCKETS: { + "field": "dateFiled", + "scale": 50, + "decay": 0.5, + }, + SEARCH_TYPES.RECAP_DOCUMENT: { + "field": "dateFiled", + "scale": 50, + "decay": 0.5, + }, + SEARCH_TYPES.ORAL_ARGUMENT: { + "field": "dateArgued", + "scale": 50, + "decay": 0.5, + }, } main_order_by = cd.get("order_by", "") if is_valid_custom_score_field and api_version == "v4": @@ -2615,9 +2609,11 @@ def apply_custom_score_to_main_query( main_order_by == "score desc" and cd["type"] in valid_decay_relevance_types ): - date_field = valid_decay_relevance_types[cd["type"]][0] + date_field = str(valid_decay_relevance_types[cd["type"]]["field"]) + scale = int(valid_decay_relevance_types[cd["type"]]["scale"]) + decay = float(valid_decay_relevance_types[cd["type"]]["decay"]) query = build_decay_relevance_score( - query, date_field, scale=10, decay=0.5, boost_mode=boost_mode + query, date_field, scale=scale, decay=decay, boost_mode=boost_mode ) return query @@ -2630,7 +2626,7 @@ def build_full_join_es_queries( child_highlighting: bool = True, api_version: Literal["v3", "v4"] | None = None, alerts: bool = False, -) -> tuple[QueryString | list, QueryString | None, QueryString | None]: +) -> EsJoinQueries: """Build a complete Elasticsearch query with both parent and child document conditions. @@ -2646,6 +2642,7 @@ def build_full_join_es_queries( """ q_should = [] + has_text_query = False match cd["type"]: case ( SEARCH_TYPES.RECAP @@ -2775,6 +2772,7 @@ def build_full_join_es_queries( string_query = build_fulltext_query( parent_query_fields, cd.get("q", ""), only_queries=True ) + has_text_query = True if string_query else False # If child filters are set, add a has_child query as a filter to the # parent query to exclude results without matching children. @@ -2822,15 +2820,21 @@ def build_full_join_es_queries( q_should.append(parent_query) if not q_should: - return [], child_docs_query, parent_query + return EsJoinQueries( + main_query=[], + parent_query=parent_query, + child_query=child_docs_query, + has_text_query=has_text_query, + ) - return ( - Q( + return EsJoinQueries( + main_query=Q( "bool", should=q_should, ), - child_docs_query, - parent_query, + parent_query=parent_query, + child_query=child_docs_query, + has_text_query=has_text_query, ) @@ -3091,11 +3095,14 @@ def do_es_api_query( # and sorting are set. # Note that in V3 Case Law Search, opinions are collapsed by cluster_id # meaning that only one result per cluster is shown. - s = build_child_docs_query( + child_docs_query = build_child_docs_query( child_docs_query, cd=cd, ) - main_query = search_query.query(s) + main_query = apply_custom_score_to_main_query( + cd, child_docs_query, api_version, boost_mode=es_queries.boost_mode + ) + main_query = search_query.query(main_query) highlight_options, fields_to_exclude = build_highlights_dict( highlighting_fields, hl_tag ) @@ -3138,7 +3145,10 @@ def do_es_api_query( # field exclusion are set. s = apply_custom_score_to_main_query( - cd, child_docs_query, api_version + cd, + child_docs_query, + api_version, + boost_mode=es_queries.boost_mode, ) main_query = search_query.query(s) highlight_options, fields_to_exclude = build_highlights_dict( diff --git a/cl/lib/types.py b/cl/lib/types.py index ff257574e9..e4c29c31e6 100644 --- a/cl/lib/types.py +++ b/cl/lib/types.py @@ -195,10 +195,19 @@ def get_db_to_dataclass_map(self): @dataclass class EsMainQueries: search_query: Search + boost_mode: str parent_query: QueryString | None = None child_query: QueryString | None = None +@dataclass +class EsJoinQueries: + main_query: QueryString | list + parent_query: QueryString | None + child_query: QueryString | None + has_text_query: bool + + @dataclass class ApiPositionMapping(BasePositionMapping): position_type_dict: defaultdict[int, list[str]] = field( diff --git a/cl/search/api_utils.py b/cl/search/api_utils.py index 8771c5e4c7..9c853af182 100644 --- a/cl/search/api_utils.py +++ b/cl/search/api_utils.py @@ -64,7 +64,7 @@ def get_object_list(request, cd, paginator): case SEARCH_TYPES.RECAP | SEARCH_TYPES.DOCKETS: search_query = ESRECAPDocument.search() case _: - search_query = None + raise ElasticBadRequestError("Unsupported search type.") if use_default_query: main_query, _, _ = build_es_main_query(search_query, cd) @@ -139,7 +139,6 @@ def __getitem__(self, item): error_to_raise = None try: - print("Query V3: ", self.main_query.to_dict()) results = self.main_query.execute() except (TransportError, ConnectionError, RequestError) as e: error_to_raise = ElasticServerError diff --git a/cl/search/api_views.py b/cl/search/api_views.py index 4f33b1d4f6..1761ccdd64 100644 --- a/cl/search/api_views.py +++ b/cl/search/api_views.py @@ -1,6 +1,5 @@ from http import HTTPStatus -import waffle from django.db.models import Prefetch from rest_framework import pagination, permissions, response, viewsets from rest_framework.exceptions import NotFound diff --git a/cl/search/tests/tests_es_opinion.py b/cl/search/tests/tests_es_opinion.py index 101bf2a32d..08eb9c6a3b 100644 --- a/cl/search/tests/tests_es_opinion.py +++ b/cl/search/tests/tests_es_opinion.py @@ -2281,7 +2281,6 @@ def setUpTestData(cls): # Rebuild the Opinion index cls.rebuild_index("search.OpinionCluster") - # Same keywords but different date_filed cls.opinion_old = OpinionClusterFactory.create( case_name="Keyword Match", @@ -2294,12 +2293,16 @@ def setUpTestData(cls): slug="opinion-old", precedential_status="Published", docket=DocketFactory( - case_name="Base Docket", - docket_number="1:21-bk-1235", - source=Docket.HARVARD, - date_filed=datetime.date(1900, 1, 1), - ) + case_name="Base Docket", + docket_number="1:21-bk-1235", + source=Docket.HARVARD, + date_filed=datetime.date(1900, 1, 1), + ), + ) + cls.child_opinion_old = OpinionFactory.create( + cluster=cls.opinion_old, plain_text="", author_str="" ) + cls.opinion_recent = OpinionClusterFactory.create( case_name="Keyword Match", case_name_full="", @@ -2311,11 +2314,14 @@ def setUpTestData(cls): slug="opinion-recent", precedential_status="Published", docket=DocketFactory( - case_name="Base Docket", - docket_number="1:21-bk-1236", - source=Docket.HARVARD, - date_filed=datetime.date(1900, 1, 1), + case_name="Base Docket", + docket_number="1:21-bk-1236", + source=Docket.HARVARD, + date_filed=datetime.date(1900, 1, 1), + ), ) + cls.child_opinion_recent = OpinionFactory.create( + cluster=cls.opinion_recent, plain_text="", author_str="" ) # Different relevance with same dateFiled @@ -2336,6 +2342,10 @@ def setUpTestData(cls): date_filed=datetime.date(1900, 1, 1), ), ) + cls.child_opinion_high_relevance = OpinionFactory.create( + cluster=cls.opinion_high_relevance, plain_text="", author_str="" + ) + cls.opinion_low_relevance = OpinionClusterFactory.create( case_name="Highly Relevant Keywords", case_name_full="", @@ -2347,13 +2357,15 @@ def setUpTestData(cls): slug="opinion-low-rel", precedential_status="Published", docket=DocketFactory( - case_name="Base Docket", - docket_number="1:21-bk-1238", - source=Docket.HARVARD, - date_filed=datetime.date(1900, 1, 1), - ), + case_name="Base Docket", + docket_number="1:21-bk-1238", + source=Docket.HARVARD, + date_filed=datetime.date(1900, 1, 1), + ), + ) + cls.child_opinion_low_relevance = OpinionFactory.create( + cluster=cls.opinion_low_relevance, plain_text="", author_str="" ) - # Different relevance with different dateFiled cls.opinion_high_relevance_old_date = OpinionClusterFactory.create( @@ -2367,11 +2379,16 @@ def setUpTestData(cls): slug="opinion-high-rel-old", precedential_status="Published", docket=DocketFactory( - case_name="Base Docket", - docket_number="1:21-bk-1239", - source=Docket.HARVARD, - date_filed=datetime.date(1900, 1, 1), - ), + case_name="Base Docket", + docket_number="1:21-bk-1239", + source=Docket.HARVARD, + date_filed=datetime.date(1900, 1, 1), + ), + ) + cls.child_opinion_high_relevance_old_date = OpinionFactory.create( + cluster=cls.opinion_high_relevance_old_date, + plain_text="", + author_str="", ) cls.opinion_low_relevance_new_date = OpinionClusterFactory.create( @@ -2385,11 +2402,16 @@ def setUpTestData(cls): slug="opinion-low-rel-new", precedential_status="Published", docket=DocketFactory( - case_name="Base Docket", - docket_number="1:21-bk-1241", - source=Docket.HARVARD, - date_filed=datetime.date(1900, 1, 1), - ), + case_name="Base Docket", + docket_number="1:21-bk-1241", + source=Docket.HARVARD, + date_filed=datetime.date(1900, 1, 1), + ), + ) + cls.child_opinion_low_relevance_new_date = OpinionFactory.create( + cluster=cls.opinion_low_relevance_new_date, + plain_text="", + author_str="", ) super().setUpTestData() @@ -2401,39 +2423,7 @@ def setUpTestData(cls): testing_mode=True, ) - def _assert_order_in_html( - self, decoded_content: str, expected_order: list - ) -> None: - """Assert that the expected order of fields appears correctly in the HTML content.""" - - for i in range(len(expected_order) - 1): - print("str(expected_order[i])", str(expected_order[i])) - print("str(expected_order[i + 1])", str(expected_order[i + 1])) - - self.assertTrue( - decoded_content.index(str(expected_order[i])) - < decoded_content.index(str(expected_order[i + 1])), - f"Expected {expected_order[i]} to appear before {expected_order[i + 1]} in the HTML content.", - ) - - async def _test_article_count(self, params, expected_count, field_name): - r = await self.async_client.get("/", params) - tree = html.fromstring(r.content.decode()) - got = len(tree.xpath("//article")) - self.assertEqual( - got, - expected_count, - msg="Did not get the right number of search results in Frontend with %s " - "filter applied.\n" - "Expected: %s\n" - " Got: %s\n\n" - "Params were: %s" % (field_name, expected_count, got, params), - ) - return r - - def test_relevancy_decay_scoring(self) -> None: - """Test relevancy decay scoring for Opinion search results.""" - test_cases = [ + cls.test_cases = [ { "name": "Same keywords, order by score desc", "search_params": { @@ -2442,12 +2432,12 @@ def test_relevancy_decay_scoring(self) -> None: "type": SEARCH_TYPES.OPINION, }, "expected_order_frontend": [ - self.opinion_recent.docket.docket_number, # Most recent dateFiled - self.opinion_old.docket.docket_number, # Oldest dateFiled + cls.opinion_recent.docket.docket_number, # Most recent dateFiled + cls.opinion_old.docket.docket_number, # Oldest dateFiled ], "expected_order": [ - self.opinion_recent.pk, # Most recent dateFiled - self.opinion_old.pk, # Oldest dateFiled + cls.opinion_recent.pk, # Most recent dateFiled + cls.opinion_old.pk, # Oldest dateFiled ], }, { @@ -2458,12 +2448,12 @@ def test_relevancy_decay_scoring(self) -> None: "type": SEARCH_TYPES.OPINION, }, "expected_order_frontend": [ - self.opinion_high_relevance.docket.docket_number, # Most relevant by keywords - self.opinion_low_relevance.docket.docket_number, # Less relevant by keywords + cls.opinion_high_relevance.docket.docket_number, # Most relevant by keywords + cls.opinion_low_relevance.docket.docket_number, # Less relevant by keywords ], "expected_order": [ - self.opinion_high_relevance.pk, # Most relevant by keywords - self.opinion_low_relevance.pk, # Less relevant by keywords + cls.opinion_high_relevance.pk, # Most relevant by keywords + cls.opinion_low_relevance.pk, # Less relevant by keywords ], }, { @@ -2474,12 +2464,12 @@ def test_relevancy_decay_scoring(self) -> None: "type": SEARCH_TYPES.OPINION, }, "expected_order_frontend": [ - self.opinion_low_relevance_new_date.docket.docket_number, - self.opinion_high_relevance_old_date.docket.docket_number, + cls.opinion_low_relevance_new_date.docket.docket_number, + cls.opinion_high_relevance_old_date.docket.docket_number, ], "expected_order": [ - self.opinion_low_relevance_new_date.pk, - self.opinion_high_relevance_old_date.pk, + cls.opinion_low_relevance_new_date.pk, + cls.opinion_high_relevance_old_date.pk, ], }, { @@ -2491,38 +2481,57 @@ def test_relevancy_decay_scoring(self) -> None: }, # Order by recency and then by relevancy as per decay scoring logic "expected_order_frontend": [ - self.opinion_low_relevance_new_date.docket.docket_number, # 2024-12-23 1:21-bk-1241 - self.opinion_recent.docket.docket_number, # 2024-02-23 1:21-bk-1236 - self.opinion_high_relevance.docket.docket_number, # 2022-02-23 1:21-bk-1237 - self.opinion_low_relevance.docket.docket_number, # 2022-02-23 1:21-bk-1238 - self.opinion_high_relevance_old_date.docket.docket_number, # 1800-02-23 1:21-bk-1239 - self.opinion_old.docket.docket_number, # 1732-02-23 1:21-bk-1235 + cls.opinion_low_relevance_new_date.docket.docket_number, # 2024-12-23 1:21-bk-1241 + cls.opinion_recent.docket.docket_number, # 2024-02-23 1:21-bk-1236 + cls.opinion_high_relevance.docket.docket_number, # 2022-02-23 1:21-bk-1237 + cls.opinion_low_relevance.docket.docket_number, # 2022-02-23 1:21-bk-1238 + cls.opinion_high_relevance_old_date.docket.docket_number, # 1800-02-23 1:21-bk-1239 + cls.opinion_old.docket.docket_number, # 1732-02-23 1:21-bk-1235 ], "expected_order": [ - self.opinion_low_relevance_new_date.pk, # 2024-12-23 - self.opinion_recent.pk, # 2024-02-23 - self.opinion_low_relevance.pk, # 2022-02-23 Higher PK - self.opinion_high_relevance.pk, # 2022-02-23 More relevant Lower PK - self.opinion_high_relevance_old_date.pk, # 1800-02-23 - self.opinion_old.pk, # 1732-02-23 + cls.opinion_low_relevance_new_date.pk, # 2024-12-23 + cls.opinion_recent.pk, # 2024-02-23 + cls.opinion_low_relevance.pk, # 2022-02-23 Higher PK + cls.opinion_high_relevance.pk, # 2022-02-23 More relevant Lower PK + cls.opinion_high_relevance_old_date.pk, # 1800-02-23 + cls.opinion_old.pk, # 1732-02-23 + ], + "expected_order_v3": [ + cls.opinion_low_relevance_new_date.pk, # 2024-12-23 + cls.opinion_recent.pk, # 2024-02-23 + cls.opinion_high_relevance.pk, # 2022-02-23 Indexed first + cls.opinion_low_relevance.pk, # 2022-02-23 + cls.opinion_high_relevance_old_date.pk, # 1800-02-23 + cls.opinion_old.pk, # 1732-02-23 ], }, ] - for test in test_cases: + def test_relevancy_decay_scoring_frontend(self) -> None: + """Test relevancy decay scoring for Opinion search Frontend""" + + for test in self.test_cases: with self.subTest(test["name"]): r = async_to_sync(self._test_article_count)( test["search_params"], len(test["expected_order_frontend"]), f"Failed count {test['name']}", ) - self._assert_order_in_html(r.content.decode(), test["expected_order_frontend"]) + self._assert_order_in_html( + r.content.decode(), test["expected_order_frontend"] + ) - for test in test_cases: - self._test_results_ordering(test, "cluster_id") + def test_relevancy_decay_scoring_v4_api(self) -> None: + """Test relevancy decay scoring for Opinion search V4 API""" + for test in self.test_cases: + self._test_results_ordering(test, "cluster_id") + def test_relevancy_decay_scoring_v3_api(self) -> None: + """Test relevancy decay scoring for Opinion search V3 API""" + for test in self.test_cases: + self._test_results_ordering(test, "cluster_id", version="v3") @override_flag("ui_flag_for_o", False) diff --git a/cl/search/tests/tests_es_oral_arguments.py b/cl/search/tests/tests_es_oral_arguments.py index 411785e731..6144cbf9ea 100644 --- a/cl/search/tests/tests_es_oral_arguments.py +++ b/cl/search/tests/tests_es_oral_arguments.py @@ -987,7 +987,8 @@ def save_percolator_query(cd): del cd["order_by"] es_queries = build_es_base_query(search_query, cd) percolator_query = AudioPercolator( - percolator_query=es_queries.search_query.to_dict()["query"], rate=Alert.REAL_TIME + percolator_query=es_queries.search_query.to_dict()["query"], + rate=Alert.REAL_TIME, ) percolator_query.save(refresh=True) return percolator_query.meta.id @@ -1051,8 +1052,8 @@ def test_oa_results_relevance_ordering(self) -> None: expected = 3 self.assertEqual(actual, expected) self.assertTrue( - r.content.decode().index("Jose") # 2015, 8, 15 - < r.content.decode().index("Hong Liu"), # 2015, 8, 14 + r.content.decode().index("Jose") # 2015, 8, 15 + < r.content.decode().index("Hong Liu"), # 2015, 8, 14 msg="'Jose' should come Before 'Hong Liu' when order_by relevance.", ) @@ -1641,9 +1642,13 @@ def test_oa_results_relevance_ordering_elastic(self) -> None: expected = 3 self.assertEqual(actual, expected) self.assertTrue( - r.content.decode().index("Hong Liu Lorem") # 2015, 8, 14 - 9.486339 - < r.content.decode().index("Hong Liu Yang") # 2015, 8, 14 - 9.034608 - < r.content.decode().index("Jose"), # 2015, 8, 15 - 4.7431693 + r.content.decode().index( + "Hong Liu Lorem" + ) # 2015, 8, 14 - 9.486339 + < r.content.decode().index( + "Hong Liu Yang" + ) # 2015, 8, 14 - 9.034608 + < r.content.decode().index("Jose"), # 2015, 8, 15 - 4.7431693 msg="'Jose' should come BEFORE 'Hong Liu Yang' and 'Hong Liu Lorem' when order_by relevance.", ) @@ -1662,9 +1667,9 @@ def test_oa_results_relevance_ordering_elastic(self) -> None: expected = 3 self.assertEqual(actual, expected) self.assertTrue( - r.content.decode().index("Jose") # 2015, 8, 15 - < r.content.decode().index("Hong Liu Lorem") # 2015, 8, 14 - < r.content.decode().index("Hong Liu Yang"), # 2015, 8, 14 + r.content.decode().index("Jose") # 2015, 8, 15 + < r.content.decode().index("Hong Liu Lorem") # 2015, 8, 14 + < r.content.decode().index("Hong Liu Yang"), # 2015, 8, 14 msg="'Jose' should come Before 'Hong Liu Lorem' and 'Hong Liu Yang' when order_by relevance.", ) @@ -2487,6 +2492,286 @@ def test_uses_exact_version_for_case_name_field(self) -> None: self.assertIn("Howells", r.content.decode()) +class OralArgumentsSearchDecayRelevancyTest( + ESIndexTestCase, V4SearchAPIAssertions, TestCase +): + """Oral Arguments Search Decay Relevancy Tests""" + + @classmethod + def setUpTestData(cls): + # Same keywords but different date_argued + with cls.captureOnCommitCallbacks(execute=True): + cls.docket_old = DocketFactory.create( + docket_number="1:21-bk-1235", + date_argued=datetime.date(1732, 2, 23), + ) + cls.audio_old = AudioFactory.create( + case_name="Keyword Match", + case_name_full="", + docket_id=cls.docket_old.pk, + duration=420, + judges="Judge Old", + local_path_original_file="test/audio/audio_old.mp3", + local_path_mp3="test/audio/audio_old.mp3", + source="C", + blocked=False, + sha1="old_sha1", + stt_status=Audio.STT_COMPLETE, + stt_transcript="Transcript for old audio", + ) + + cls.docket_recent = DocketFactory.create( + docket_number="1:21-bk-1236", + date_argued=datetime.date(2024, 2, 23), + ) + cls.audio_recent = AudioFactory.create( + case_name="Keyword Match", + case_name_full="", + docket_id=cls.docket_recent.pk, + duration=420, + judges="Judge Recent", + local_path_original_file="test/audio/audio_recent.mp3", + local_path_mp3="test/audio/audio_recent.mp3", + source="C", + blocked=False, + sha1="recent_sha1", + stt_status=Audio.STT_COMPLETE, + stt_transcript="Transcript for recent audio", + ) + + # Different relevance with same date_argued + cls.docket_low_relevance = DocketFactory.create( + case_name="Highly Relevant Keywords", + docket_number="1:21-bk-1238", + date_argued=datetime.date(2022, 2, 23), + ) + cls.audio_low_relevance = AudioFactory.create( + case_name="Highly Relevant Keywords", + case_name_full="", + docket_id=cls.docket_low_relevance.pk, + duration=420, + judges="Judge Low", + local_path_original_file="test/audio/audio_low_rel.mp3", + local_path_mp3="test/audio/audio_low_rel.mp3", + source="C", + blocked=False, + sha1="low_rel_sha1", + stt_status=Audio.STT_COMPLETE, + stt_transcript="", + ) + + cls.docket_high_relevance = DocketFactory.create( + case_name="Highly Relevant Keywords", + docket_number="1:21-bk-1237", + date_argued=datetime.date(2022, 2, 23), + ) + cls.audio_high_relevance = AudioFactory.create( + case_name="Highly Relevant Keywords", + case_name_full="", + docket_id=cls.docket_high_relevance.pk, + duration=420, + judges="Judge High", + local_path_original_file="test/audio/audio_high_rel.mp3", + local_path_mp3="test/audio/audio_high_rel.mp3", + source="C", + blocked=False, + sha1="high_rel_sha1", + stt_status=Audio.STT_COMPLETE, + # More relevancy can be indicated by adding more relevant keywords in transcript + stt_transcript="More Highly Relevant Keywords in the transcript", + ) + + # Different relevance with different date_argued + cls.docket_high_relevance_old_date = DocketFactory.create( + case_name="Ipsum Dolor Terms", + docket_number="1:21-bk-1239", + date_argued=datetime.date(1800, 2, 23), + ) + cls.audio_high_relevance_old_date = AudioFactory.create( + case_name="Ipsum Dolor Terms", + case_name_full="", + docket_id=cls.docket_high_relevance_old_date.pk, + duration=420, + judges="Judge Old Relevant", + local_path_original_file="test/audio/audio_high_rel_old.mp3", + local_path_mp3="test/audio/audio_high_rel_old.mp3", + source="C", + blocked=False, + sha1="high_rel_old_sha1", + stt_status=Audio.STT_COMPLETE, + stt_transcript="More Ipsum Dolor Terms", + ) + + cls.docket_high_relevance_null_date = DocketFactory.create( + case_name="Ipsum Dolor Terms", + docket_number="1:21-bk-1240", + date_argued=None, + ) + cls.audio_high_relevance_null_date = AudioFactory.create( + case_name="Ipsum Dolor Terms", + case_name_full="", + docket_id=cls.docket_high_relevance_null_date.pk, + duration=420, + judges="Judge Null", + local_path_original_file="test/audio/audio_high_rel_null.mp3", + local_path_mp3="test/audio/audio_high_rel_null.mp3", + source="C", + blocked=False, + sha1="high_rel_null_sha1", + stt_status=Audio.STT_COMPLETE, + stt_transcript="More Ipsum Dolor Terms", + ) + + cls.docket_low_relevance_new_date = DocketFactory.create( + case_name="Ipsum Dolor Terms", + docket_number="1:21-bk-1241", + date_argued=datetime.date(2024, 12, 23), + ) + cls.audio_low_relevance_new_date = AudioFactory.create( + case_name="Ipsum Dolor Terms", + case_name_full="", + docket_id=cls.docket_low_relevance_new_date.pk, + duration=420, + judges="Judge New Low", + local_path_original_file="test/audio/audio_low_rel_new.mp3", + local_path_mp3="test/audio/audio_low_rel_new.mp3", + source="C", + blocked=False, + sha1="low_rel_new_sha1", + stt_status=Audio.STT_COMPLETE, + stt_transcript="", + ) + + cls.test_cases = [ + { + "name": "Same keywords, order by score desc", + "search_params": { + "q": "Keyword Match", + "order_by": "score desc", + "type": SEARCH_TYPES.ORAL_ARGUMENT, + }, + "expected_order_frontend": [ + cls.docket_recent.docket_number, # Most recent date_argued + cls.docket_old.docket_number, # Oldest date_argued + ], + "expected_order": [ + cls.audio_recent.pk, + cls.audio_old.pk, + ], + }, + { + "name": "Different relevancy same dateArgued, order by score desc", + "search_params": { + "q": "Highly Relevant Keywords", + "order_by": "score desc", + "type": SEARCH_TYPES.ORAL_ARGUMENT, + }, + "expected_order_frontend": [ + cls.docket_high_relevance.docket_number, # Most relevant by keywords + cls.docket_low_relevance.docket_number, # Less relevant by keywords + ], + "expected_order": [ + cls.audio_high_relevance.pk, + cls.audio_low_relevance.pk, + ], + }, + { + "name": "Different relevancy different dateArgued, order by score desc", + "search_params": { + "q": "Ipsum Dolor Terms", + "order_by": "score desc", + "type": SEARCH_TYPES.ORAL_ARGUMENT, + }, + "expected_order_frontend": [ + cls.docket_low_relevance_new_date.docket_number, + cls.docket_high_relevance_old_date.docket_number, + cls.docket_high_relevance_null_date.docket_number, + ], + "expected_order": [ + cls.audio_low_relevance_new_date.pk, + cls.audio_high_relevance_old_date.pk, + cls.audio_high_relevance_null_date.pk, + ], + }, + { + "name": "Fixed main score (Filtering) different dateArgued, order by score desc", + "search_params": { + "case_name": "Ipsum Dolor Terms", + "order_by": "score desc", + "type": SEARCH_TYPES.ORAL_ARGUMENT, + }, + "expected_order_frontend": [ + cls.docket_low_relevance_new_date.docket_number, + cls.docket_high_relevance_old_date.docket_number, + cls.docket_high_relevance_null_date.docket_number, + ], + "expected_order": [ + cls.audio_low_relevance_new_date.pk, + cls.audio_high_relevance_old_date.pk, + cls.audio_high_relevance_null_date.pk, + ], + }, + { + "name": "Match all query decay relevancy.", + "search_params": { + "q": "", + "order_by": "score desc", + "type": SEARCH_TYPES.ORAL_ARGUMENT, + }, + "expected_order_frontend": [ + cls.docket_low_relevance_new_date.docket_number, # 2024-12-23 1:21-bk-1241 + cls.docket_recent.docket_number, # 2024-02-23 1:21-bk-1236 + cls.docket_low_relevance.docket_number, # 2022-02-23 1:21-bk-1238 + cls.docket_high_relevance.docket_number, # 2022-02-23 1:21-bk-1237 + cls.docket_high_relevance_old_date.docket_number, # 1800-02-23 1:21-bk-1239 + cls.docket_old.docket_number, # 1732-02-23 1:21-bk-1235 + cls.docket_high_relevance_null_date.docket_number, # Null date 1:21-bk-1240 + ], + "expected_order": [ + cls.audio_low_relevance_new_date.pk, + cls.audio_recent.pk, + cls.audio_high_relevance.pk, + cls.audio_low_relevance.pk, + cls.audio_high_relevance_old_date.pk, + cls.audio_old.pk, + cls.audio_high_relevance_null_date.pk, + ], + "expected_order_v3": [ + cls.audio_low_relevance_new_date.pk, + cls.audio_recent.pk, + cls.audio_low_relevance.pk, + cls.audio_high_relevance.pk, + cls.audio_high_relevance_old_date.pk, + cls.audio_old.pk, + cls.audio_high_relevance_null_date.pk, + ], + }, + ] + + def test_relevancy_decay_scoring_frontend(self) -> None: + """Test relevancy decay scoring for Oral Arguments search Frontend""" + for test in self.test_cases: + with self.subTest(test["name"]): + r = async_to_sync(self._test_article_count)( + test["search_params"], + len(test["expected_order_frontend"]), + f"Failed count {test['name']}", + ) + self._assert_order_in_html( + r.content.decode(), test["expected_order_frontend"] + ) + + def test_relevancy_decay_scoring_v4_api(self) -> None: + """Test relevancy decay scoring for Oral Arguments search V4 API""" + for test in self.test_cases: + self._test_results_ordering(test, "id", version="v4") + + def test_relevancy_decay_scoring_v3_api(self) -> None: + """Test relevancy decay scoring for Oral Arguments search V3 API""" + for test in self.test_cases: + self._test_results_ordering(test, "id", version="v3") + + class OralArgumentIndexingTest( CountESTasksTestCase, ESIndexTestCase, TransactionTestCase ): diff --git a/cl/search/tests/tests_es_recap.py b/cl/search/tests/tests_es_recap.py index 3c49d7f17a..4b504ede84 100644 --- a/cl/search/tests/tests_es_recap.py +++ b/cl/search/tests/tests_es_recap.py @@ -2874,6 +2874,17 @@ def setUpTestData(cls): source=Docket.RECAP, date_filed=datetime.date(1732, 2, 23), ) + cls.rd_old = RECAPDocumentFactory( + docket_entry=DocketEntryWithParentsFactory( + docket=cls.docket_old, + entry_number=1, + description="", + ), + description="", + is_available=False, + pacer_doc_id="019036000435", + ) + cls.docket_recent = DocketFactory( case_name="Keyword Match", case_name_full="", @@ -2882,6 +2893,16 @@ def setUpTestData(cls): source=Docket.RECAP, date_filed=datetime.date(2024, 2, 23), ) + cls.rd_recent = RECAPDocumentFactory( + docket_entry=DocketEntryWithParentsFactory( + docket=cls.docket_recent, + entry_number=1, + description="", + ), + description="", + is_available=False, + pacer_doc_id="019036000436", + ) # Different relevance with same dateFiled cls.docket_low_relevance = DocketFactory( @@ -2893,6 +2914,17 @@ def setUpTestData(cls): source=Docket.RECAP, date_filed=datetime.date(2022, 2, 23), ) + cls.rd_low_relevance = RECAPDocumentFactory( + docket_entry=DocketEntryWithParentsFactory( + docket=cls.docket_low_relevance, + entry_number=1, + description="", + ), + description="", + is_available=False, + pacer_doc_id="019036000437", + ) + cls.docket_high_relevance = DocketFactory( case_name="Highly Relevant Keywords", case_name_full="", @@ -2903,6 +2935,16 @@ def setUpTestData(cls): cause="More Highly Relevant Keywords", date_filed=datetime.date(2022, 2, 23), ) + cls.rd_high_relevance = RECAPDocumentFactory( + docket_entry=DocketEntryWithParentsFactory( + docket=cls.docket_high_relevance, + entry_number=1, + description="", + ), + description="", + is_available=False, + pacer_doc_id="01903600048", + ) # Different relevance with different dateFiled cls.docket_high_relevance_old_date = DocketFactory( @@ -2915,6 +2957,17 @@ def setUpTestData(cls): cause="More Ipsum Dolor Terms", date_filed=datetime.date(1800, 2, 23), ) + cls.rd_high_relevance_old_date = RECAPDocumentFactory( + docket_entry=DocketEntryWithParentsFactory( + docket=cls.docket_high_relevance_old_date, + entry_number=1, + description="", + ), + description="", + is_available=False, + pacer_doc_id="01903600049", + ) + cls.docket_high_relevance_null_date = DocketFactory( case_name="Ipsum Dolor Terms", case_name_full="", @@ -2925,6 +2978,17 @@ def setUpTestData(cls): cause="More Ipsum Dolor Terms", date_filed=None, ) + cls.rd_high_relevance_null_date = RECAPDocumentFactory( + docket_entry=DocketEntryWithParentsFactory( + docket=cls.docket_high_relevance_null_date, + entry_number=1, + description="", + ), + description="", + is_available=False, + pacer_doc_id="01903600050", + ) + cls.docket_low_relevance_new_date = DocketFactory( case_name="Ipsum Dolor Terms", case_name_full="", @@ -2934,6 +2998,17 @@ def setUpTestData(cls): source=Docket.RECAP, date_filed=datetime.date(2024, 12, 23), ) + cls.rd_low_relevance_new_date = RECAPDocumentFactory( + docket_entry=DocketEntryWithParentsFactory( + docket=cls.docket_low_relevance_new_date, + entry_number=1, + description="", + ), + description="", + is_available=False, + pacer_doc_id="01903600051", + ) + super().setUpTestData() call_command( "cl_index_parent_and_child_docs", @@ -2943,40 +3018,7 @@ def setUpTestData(cls): testing_mode=True, ) - def _assert_order_in_html( - self, decoded_content: str, expected_order: list - ) -> None: - """Assert that the expected order of fields appears correctly in the HTML content.""" - - for i in range(len(expected_order) - 1): - - print("str(expected_order[i])", str(expected_order[i])) - print("str(expected_order[i + 1])", str(expected_order[i + 1])) - - self.assertTrue( - decoded_content.index(str(expected_order[i])) - < decoded_content.index(str(expected_order[i + 1])), - f"Expected {expected_order[i]} to appear before {expected_order[i + 1]} in the HTML content.", - ) - - async def _test_article_count(self, params, expected_count, field_name): - r = await self.async_client.get("/", params) - tree = html.fromstring(r.content.decode()) - got = len(tree.xpath("//article")) - self.assertEqual( - got, - expected_count, - msg="Did not get the right number of search results in Frontend with %s " - "filter applied.\n" - "Expected: %s\n" - " Got: %s\n\n" - "Params were: %s" % (field_name, expected_count, got, params), - ) - return r - - def test_relevancy_decay_scoring(self) -> None: - """Test relevancy decay scoring for RECAP search results.""" - test_cases = [ + cls.test_cases = [ { "name": "Same keywords, order by score desc", "search_params": { @@ -2985,12 +3027,12 @@ def test_relevancy_decay_scoring(self) -> None: "type": SEARCH_TYPES.RECAP, }, "expected_order_frontend": [ - self.docket_recent.docket_number, # Most recent dateFiled - self.docket_old.docket_number, # Oldest dateFiled + cls.docket_recent.docket_number, # Most recent dateFiled + cls.docket_old.docket_number, # Oldest dateFiled ], "expected_order": [ - self.docket_recent.pk, # Most recent dateFiled - self.docket_old.pk, # Oldest dateFiled + cls.docket_recent.pk, # Most recent dateFiled + cls.docket_old.pk, # Oldest dateFiled ], }, { @@ -3001,13 +3043,15 @@ def test_relevancy_decay_scoring(self) -> None: "type": SEARCH_TYPES.RECAP, }, "expected_order_frontend": [ - self.docket_high_relevance.docket_number, # Most relevant by keywords - self.docket_low_relevance.docket_number, # Less relevant by keywords + cls.docket_high_relevance.docket_number, + # Most relevant by keywords + cls.docket_low_relevance.docket_number, + # Less relevant by keywords ], "expected_order": [ - self.docket_high_relevance.pk, + cls.docket_high_relevance.pk, # Most relevant by keywords - self.docket_low_relevance.pk, + cls.docket_low_relevance.pk, # Less relevant by keywords ], }, @@ -3019,14 +3063,32 @@ def test_relevancy_decay_scoring(self) -> None: "type": SEARCH_TYPES.RECAP, }, "expected_order_frontend": [ - self.docket_low_relevance_new_date.docket_number, - self.docket_high_relevance_old_date.docket_number, - self.docket_high_relevance_null_date.docket_number, + cls.docket_low_relevance_new_date.docket_number, + cls.docket_high_relevance_old_date.docket_number, + cls.docket_high_relevance_null_date.docket_number, ], "expected_order": [ - self.docket_low_relevance_new_date.pk, - self.docket_high_relevance_old_date.pk, - self.docket_high_relevance_null_date.pk, + cls.docket_low_relevance_new_date.pk, + cls.docket_high_relevance_old_date.pk, + cls.docket_high_relevance_null_date.pk, + ], + }, + { + "name": "Fixed main score (Filtering) different dateFiled, order by score desc", + "search_params": { + "case_name": "Ipsum Dolor Terms", + "order_by": "score desc", + "type": SEARCH_TYPES.RECAP, + }, + "expected_order_frontend": [ + cls.docket_low_relevance_new_date.docket_number, + cls.docket_high_relevance_old_date.docket_number, + cls.docket_high_relevance_null_date.docket_number, + ], + "expected_order": [ + cls.docket_low_relevance_new_date.pk, + cls.docket_high_relevance_old_date.pk, + cls.docket_high_relevance_null_date.pk, ], }, { @@ -3037,33 +3099,57 @@ def test_relevancy_decay_scoring(self) -> None: "type": SEARCH_TYPES.RECAP, }, "expected_order_frontend": [ - self.docket_low_relevance_new_date.docket_number, # 2024, 12, 23 1:21-bk-1241 - self.docket_recent.docket_number, # 2024, 2, 23 1:21-bk-1236 - self.docket_low_relevance.docket_number, # 2022, 2, 23 1:21-bk-1238 - self.docket_high_relevance.docket_number, # 2022, 2, 23 1:21-bk-1237 - self.docket_high_relevance_old_date.docket_number, # 1800, 2, 23 1:21-bk-1239 - self.docket_old.docket_number, # 1732, 2, 23 1:21-bk-1235 - self.docket_high_relevance_null_date.docket_number, # Null 1:21-bk-1240 + cls.docket_low_relevance_new_date.docket_number, + # 2024, 12, 23 1:21-bk-1241 + cls.docket_recent.docket_number, + # 2024, 2, 23 1:21-bk-1236 + cls.docket_low_relevance.docket_number, + # 2022, 2, 23 1:21-bk-1238 + cls.docket_high_relevance.docket_number, + # 2022, 2, 23 1:21-bk-1237 + cls.docket_high_relevance_old_date.docket_number, + # 1800, 2, 23 1:21-bk-1239 + cls.docket_old.docket_number, # 1732, 2, 23 1:21-bk-1235 + cls.docket_high_relevance_null_date.docket_number, + # Null 1:21-bk-1240 ], "expected_order": [ - self.docket_low_relevance_new_date.pk, + cls.docket_low_relevance_new_date.pk, # 2024, 12, 23 1:21-bk-1241 - self.docket_recent.pk, + cls.docket_recent.pk, # 2024, 2, 23 1:21-bk-1236 - self.docket_high_relevance.pk, + cls.docket_high_relevance.pk, # 2022, 2, 23 1:21-bk-1237 Greater PK - self.docket_low_relevance.pk, + cls.docket_low_relevance.pk, # 2022, 2, 23 1:21-bk-1238 - self.docket_high_relevance_old_date.pk, + cls.docket_high_relevance_old_date.pk, # 1800, 2, 23 1:21-bk-1239 - self.docket_old.pk, # 1732, 2, 23 1:21-bk-1235 - self.docket_high_relevance_null_date.pk, + cls.docket_old.pk, # 1732, 2, 23 1:21-bk-1235 + cls.docket_high_relevance_null_date.pk, + # Null 1:21-bk-1240 + ], + "expected_order_v3": [ + cls.docket_low_relevance_new_date.pk, + # 2024, 12, 23 1:21-bk-1241 + cls.docket_recent.pk, + # 2024, 2, 23 1:21-bk-1236 + cls.docket_low_relevance.pk, + # 2022, 2, 23 1:21-bk-1238 Indexed first than 1:21-bk-1237 + cls.docket_high_relevance.pk, + # 2022, 2, 23 1:21-bk-1237 + cls.docket_high_relevance_old_date.pk, + # 1800, 2, 23 1:21-bk-1239 + cls.docket_old.pk, # 1732, 2, 23 1:21-bk-1235 + cls.docket_high_relevance_null_date.pk, # Null 1:21-bk-1240 ], }, ] - for test in test_cases: + def test_relevancy_decay_scoring_frontend(self) -> None: + """Test relevancy decay scoring for RECAP search Frontend""" + + for test in self.test_cases: with self.subTest(test["name"]): r = async_to_sync(self._test_article_count)( test["search_params"], @@ -3074,11 +3160,27 @@ def test_relevancy_decay_scoring(self) -> None: r.content.decode(), test["expected_order_frontend"] ) + def test_relevancy_decay_scoring_v4_api(self) -> None: + """Test relevancy decay scoring for RECAP search V4 API""" + + search_types = [ + SEARCH_TYPES.RECAP, + SEARCH_TYPES.DOCKETS, + SEARCH_TYPES.RECAP_DOCUMENT, + ] + for search_type in search_types: + for test in self.test_cases: + test["search_params"]["type"] = search_type + self._test_results_ordering(test, "docket_id", version="v4") + + def test_relevancy_decay_scoring_v3_api(self) -> None: + """Test relevancy decay scoring for RECAP search V4 API""" + search_types = [SEARCH_TYPES.RECAP, SEARCH_TYPES.DOCKETS] for search_type in search_types: - for test in test_cases: + for test in self.test_cases: test["search_params"]["type"] = search_type - self._test_results_ordering(test, "docket_id") + self._test_results_ordering(test, "docket_id", version="v3") class RECAPSearchAPICommonTests(RECAPSearchTestCase): @@ -3625,17 +3727,17 @@ async def test_results_ordering(self) -> None: # API r = await self._test_api_results_count(params, 3, "order score desc") self.assertTrue( - r.content.decode().index("1:21-bk-1234") - < r.content.decode().index("12-1235"), - msg="'1:21-bk-1234' should come BEFORE '12-1235' when order_by score desc.", + r.content.decode().index("12-1235") # 2016, 8, 16 + < r.content.decode().index("1:21-bk-1234"), # 2015, 8, 16 + msg="'12-1235' should come BEFORE '1:21-bk-1234' when order_by score desc.", ) params["type"] = SEARCH_TYPES.DOCKETS r = await self._test_api_results_count(params, 2, "order") self.assertTrue( - r.content.decode().index("1:21-bk-1234") - < r.content.decode().index("12-1235"), - msg="'1:21-bk-1234' should come BEFORE '12-1235' when order_by score desc.", + r.content.decode().index("12-1235") # 2016, 8, 16 + < r.content.decode().index("1:21-bk-1234"), # 2015, 8, 16 + msg="'12-1235' should come BEFORE '1:21-bk-1234' when order_by score desc.", ) # Order by entry_date_filed desc diff --git a/cl/tests/cases.py b/cl/tests/cases.py index 659c215bbd..184555abb5 100644 --- a/cl/tests/cases.py +++ b/cl/tests/cases.py @@ -346,26 +346,63 @@ async def _test_api_fields_content( f"Parent field '{field}' does not match.", ) - def _test_results_ordering(self, test, field): + def _test_results_ordering(self, test, field, version="v4"): """Ensure dockets appear in the response in a specific order.""" with self.subTest(test=test, msg=f'{test["name"]}'): r = self.client.get( - reverse("search-list", kwargs={"version": "v4"}), + reverse("search-list", kwargs={"version": version}), test["search_params"], ) + + expected_order_key = "expected_order" + if version == "v3": + expected_order_key = ( + "expected_order_v3" + if "expected_order_v3" in test + else "expected_order" + ) + self.assertEqual( - len(r.data["results"]), len(test["expected_order"]) + len(r.data["results"]), len(test[expected_order_key]) ) # Note that dockets where the date_field is null are sent to the bottom # of the results actual_order = [result[field] for result in r.data["results"]] self.assertEqual( actual_order, - test["expected_order"], - msg=f'Expected order {test["expected_order"]}, but got {actual_order}', + test[expected_order_key], + msg=f"Expected order {test[expected_order_key]}, but got {actual_order}", + ) + + def _assert_order_in_html( + self, decoded_content: str, expected_order: list + ) -> None: + """Assert that the expected order of documents appears correctly in the + HTML content.""" + + for i in range(len(expected_order) - 1): + self.assertTrue( + decoded_content.index(str(expected_order[i])) + < decoded_content.index(str(expected_order[i + 1])), + f"Expected {expected_order[i]} to appear before {expected_order[i + 1]} in the HTML content.", ) + async def _test_article_count(self, params, expected_count, field_name): + r = await self.async_client.get("/", params) + tree = html.fromstring(r.content.decode()) + got = len(tree.xpath("//article")) + self.assertEqual( + got, + expected_count, + msg="Did not get the right number of search results in Frontend with %s " + "filter applied.\n" + "Expected: %s\n" + " Got: %s\n\n" + "Params were: %s" % (field_name, expected_count, got, params), + ) + return r + def _test_page_variables( self, response, test_case, current_page, search_type ):