Skip to content

Commit

Permalink
Merge pull request #4791 from freelawproject/4726-feat-rip-out-solr-i…
Browse files Browse the repository at this point in the history
…ndexing-pipeline

feat(search): Rips out SOLR indexing pipeline
  • Loading branch information
mlissner authored Dec 11, 2024
2 parents d0666a5 + cef871b commit b6d2f60
Show file tree
Hide file tree
Showing 73 changed files with 152 additions and 4,145 deletions.
2 changes: 0 additions & 2 deletions cl/alerts/management/commands/cl_send_alerts.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,8 +295,6 @@ def clean_rt_queue(self):
def remove_stale_rt_items(self, age=2):
"""Remove anything old from the RTQ.
This helps avoid issues with solr hitting the maxboolean clause errors.
:param age: How many days old should items be before we start deleting
them?
"""
Expand Down
21 changes: 7 additions & 14 deletions cl/alerts/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -373,27 +373,20 @@ def send_alert_and_webhook(


@app.task(ignore_result=True)
def send_alerts_and_webhooks(
data: Dict[str, Union[List[Tuple], List[int]]]
) -> List[int]:
def send_alerts_and_webhooks(data: list[tuple[int, datetime]]) -> List[int]:
"""Send many docket alerts at one time without making numerous calls
to the send_alert_and_webhook function.
:param data: A dict with up to two keys:
:param data: A list of tuples. Each tuple contains the docket ID, and
a time. The time indicates that alerts should be sent for
items *after* that point.
d_pks_to_alert: A list of tuples. Each tuple contains the docket ID, and
a time. The time indicates that alerts should be sent for
items *after* that point.
rds_for_solr: A list of RECAPDocument ids that need to be sent to Solr
to be made searchable.
:returns: Simply passes through the rds_for_solr list, in case it is
consumed by the next task. If rds_for_solr is not provided, returns an
empty list.
:returns: An empty list
"""
for args in data["d_pks_to_alert"]:
for args in data:
send_alert_and_webhook(*args)

return cast(List[int], data.get("rds_for_solr", []))
return []


@app.task(ignore_result=True)
Expand Down
2 changes: 1 addition & 1 deletion cl/alerts/tests/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -1164,7 +1164,7 @@ def test_send_search_alert_webhooks_rates(self):
):
# Monthly alerts cannot be run on the 29th, 30th or 31st.
with time_machine.travel(self.mock_date, tick=False):
# Send Solr Alerts (Except OA)
# Send Alerts (Except OA)
call_command("cl_send_alerts", rate=rate)
# Send ES Alerts (Only OA for now)
call_command("cl_send_scheduled_alerts", rate=rate)
Expand Down
4 changes: 2 additions & 2 deletions cl/api/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def send_es_search_alert_webhook(
"""Send a search alert webhook event containing search results from a
search alert object.
:param results: The search results returned by SOLR for this alert.
:param results: The search results returned for this alert.
:param webhook_pk: The webhook endpoint ID object to send the event to.
:param alert: The search alert object.
"""
Expand Down Expand Up @@ -134,7 +134,7 @@ def send_search_alert_webhook_es(
"""Send a search alert webhook event containing search results from a
search alert object.
:param results: The search results returned by SOLR for this alert.
:param results: The search results returned for this alert.
:param webhook_pk: The webhook endpoint ID object to send the event to.
:param alert_pk: The search alert ID.
"""
Expand Down
2 changes: 1 addition & 1 deletion cl/api/webhooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ def send_search_alert_webhook(
"""Send a search alert webhook event containing search results from a
search alert object.
:param results: The search results returned by SOLR for this alert.
:param results: The search results returned for this alert.
:param webhook: The webhook endpoint object to send the event to.
:param alert: The search alert object.
"""
Expand Down
10 changes: 0 additions & 10 deletions cl/audio/factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,6 @@ class Meta:
sha1 = Faker("sha1")
download_url = Faker("url")

@classmethod
def _create(cls, model_class, *args, **kwargs):
"""Creates an instance of the model class without indexing."""
obj = model_class(*args, **kwargs)
# explicitly sets `index=False` to prevent it from being indexed in SOLR.
# Once Solr is removed, we can just remove this method completely.
obj.save(index=False)
return obj

"""
These hooks are necessary to make this factory compatible with the
`make_dev_command`. by delegating the file creation to the hooks, we prevent
Expand Down Expand Up @@ -60,7 +51,6 @@ def _after_postgeneration(cls, instance, create, results=None):
if create and results:
# Some post-generation hooks ran, and may have modified the instance.
instance.save(
index=False,
update_fields=["local_path_mp3", "local_path_original_file"],
)

Expand Down
105 changes: 1 addition & 104 deletions cl/audio/models.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,11 @@
from typing import Dict, List, Union

import pghistory
from django.db import models
from django.template import loader
from django.urls import NoReverseMatch, reverse
from django.urls import reverse
from model_utils import FieldTracker

from cl.custom_filters.templatetags.text_filters import best_case_name
from cl.lib.date_time import midnight_pt
from cl.lib.model_helpers import make_upload_path
from cl.lib.models import AbstractDateTimeModel, s3_warning_note
from cl.lib.search_index_utils import (
InvalidDocumentError,
normalize_search_dicts,
null_map,
)
from cl.lib.storage import IncrementingAWSMediaStorage
from cl.lib.utils import deepgetattr
from cl.people_db.models import Person
from cl.search.models import SOURCES, Docket

Expand Down Expand Up @@ -196,98 +185,6 @@ def __str__(self) -> str:
def get_absolute_url(self) -> str:
return reverse("view_audio_file", args=[self.pk, self.docket.slug])

def save( # type: ignore[override]
self,
index: bool = True,
force_commit: bool = False,
*args: List,
**kwargs: Dict,
) -> None:
"""
Overrides the normal save method, but provides integration with the
bulk files and with Solr indexing.
:param index: Should the item be added to the Solr index?
:param force_commit: Should a commit be performed in solr after
indexing it?
"""
super().save(*args, **kwargs) # type: ignore
if index:
from cl.search.tasks import add_items_to_solr

add_items_to_solr([self.pk], "audio.Audio", force_commit)

def delete( # type: ignore[override]
self,
*args: List,
**kwargs: Dict,
) -> None:
"""
Update the index as items are deleted.
"""
id_cache = self.pk
super().delete(*args, **kwargs) # type: ignore
from cl.search.tasks import delete_items

delete_items.delay([id_cache], "audio.Audio")

def as_search_dict(self) -> Dict[str, Union[int, List[int], str]]:
"""Create a dict that can be ingested by Solr"""
# IDs
out = {
"id": self.pk,
"docket_id": self.docket_id,
"court_id": self.docket.court_id,
}

# Docket
docket = {"docketNumber": self.docket.docket_number}
if self.docket.date_argued is not None:
docket["dateArgued"] = midnight_pt(self.docket.date_argued)
if self.docket.date_reargued is not None:
docket["dateReargued"] = midnight_pt(self.docket.date_reargued)
if self.docket.date_reargument_denied is not None:
docket["dateReargumentDenied"] = midnight_pt(
self.docket.date_reargument_denied
)
out.update(docket)

# Court
out.update(
{
"court": self.docket.court.full_name,
"court_citation_string": self.docket.court.citation_string,
"court_exact": self.docket.court_id, # For faceting
}
)

# Audio File
out.update(
{
"caseName": best_case_name(self),
"panel_ids": [judge.pk for judge in self.panel.all()],
"judge": self.judges,
"file_size_mp3": deepgetattr(
self, "local_path_mp3.size", None
),
"duration": self.duration,
"source": self.source,
"download_url": self.download_url,
"local_path": deepgetattr(self, "local_path_mp3.name", None),
}
)
try:
out["absolute_url"] = self.get_absolute_url()
except NoReverseMatch:
raise InvalidDocumentError(
f"Unable to save to index due to missing absolute_url: {self.pk}"
)

text_template = loader.get_template("indexes/audio_text.txt")
out["text"] = text_template.render({"item": self}).translate(null_map)

return normalize_search_dicts(out)


@pghistory.track(
pghistory.InsertEvent(), pghistory.DeleteEvent(), obj_field=None
Expand Down
Loading

0 comments on commit b6d2f60

Please sign in to comment.