From f32392fc9ec6b265957415e59c1780dae4da6fa6 Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Tue, 10 Dec 2024 23:42:28 -0400 Subject: [PATCH] feat(lib): Removes spaces in proximity search --- cl/lib/utils.py | 3 +++ cl/search/tests/tests.py | 8 ++++++++ 2 files changed, 11 insertions(+) diff --git a/cl/lib/utils.py b/cl/lib/utils.py index 592f8876d0..172298c808 100644 --- a/cl/lib/utils.py +++ b/cl/lib/utils.py @@ -239,6 +239,7 @@ def cleanup_main_query(query_string: str) -> str: - Add hyphens to district docket numbers that lack them - Ignore tokens inside phrases - Handle query punctuation correctly by mostly ignoring it + - Removes spaces between phrase query and tilde(~) operator - Capture "court_id:court" queries, retrieve the child courts for each court in the query, append them, and then add them back to the original query. @@ -289,6 +290,8 @@ def cleanup_main_query(query_string: str) -> str: cleaned_items.append(f'"{item}"') cleaned_query = "".join(cleaned_items) + # Removes spaces between phrase query and tilde(~) operator + cleaned_query = re.sub(r'(")\s*(?=~\d+)', r"\1", cleaned_query) # If it's a court_id query, parse it, append the child courts, and then # reintegrate them into the original query. final_query = modify_court_id_queries(cleaned_query) diff --git a/cl/search/tests/tests.py b/cl/search/tests/tests.py index 6fc929a671..76bbecf5e4 100644 --- a/cl/search/tests/tests.py +++ b/cl/search/tests/tests.py @@ -1156,6 +1156,14 @@ def test_query_cleanup_function(self) -> None: '"this is a test" 22cv3332', '"this is a test" docketNumber:"22-cv-3332"~1', ), + ( + '"this is a test" ~2', + '"this is a test"~2', + ), + ( + '"this is a test" ~2 and "net neutrality" ~5 and 22cv3332', + '"this is a test"~2 and "net neutrality"~5 and docketNumber:"22-cv-3332"~1', + ), ) for q, a in q_a: print("Does {q} --> {a} ? ".format(**{"q": q, "a": a}))