Skip to content

Commit

Permalink
Merge branch 'main' into 4726-feat-rip-out-solr-indexing-pipeline
Browse files Browse the repository at this point in the history
  • Loading branch information
mlissner authored Dec 11, 2024
2 parents 3a0c710 + d0666a5 commit cef871b
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 0 deletions.
3 changes: 3 additions & 0 deletions cl/lib/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,7 @@ def cleanup_main_query(query_string: str) -> str:
- Add hyphens to district docket numbers that lack them
- Ignore tokens inside phrases
- Handle query punctuation correctly by mostly ignoring it
- Removes spaces between phrase query and tilde(~) operator
- Capture "court_id:court" queries, retrieve the child courts for each
court in the query, append them, and then add them back to the original
query.
Expand Down Expand Up @@ -289,6 +290,8 @@ def cleanup_main_query(query_string: str) -> str:
cleaned_items.append(f'"{item}"')

cleaned_query = "".join(cleaned_items)
# Removes spaces between phrase query and tilde(~) operator
cleaned_query = re.sub(r'(")\s*(?=~\d+)', r"\1", cleaned_query)
# If it's a court_id query, parse it, append the child courts, and then
# reintegrate them into the original query.
final_query = modify_court_id_queries(cleaned_query)
Expand Down
8 changes: 8 additions & 0 deletions cl/search/tests/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -981,6 +981,14 @@ def test_query_cleanup_function(self) -> None:
'"this is a test" 22cv3332',
'"this is a test" docketNumber:"22-cv-3332"~1',
),
(
'"this is a test" ~2',
'"this is a test"~2',
),
(
'"this is a test" ~2 and "net neutrality" ~5 and 22cv3332',
'"this is a test"~2 and "net neutrality"~5 and docketNumber:"22-cv-3332"~1',
),
)
for q, a in q_a:
print("Does {q} --> {a} ? ".format(**{"q": q, "a": a}))
Expand Down

0 comments on commit cef871b

Please sign in to comment.