Merge pull request #4791 from freelawproject/4726-feat-rip-out-solr-i…

…ndexing-pipeline feat(search): Rips out SOLR indexing pipeline
freelawproject · Dec 11, 2024 · b6d2f60 · b6d2f60
2 parents d0666a5 + cef871b
commit b6d2f60
Show file tree

Hide file tree

Showing 73 changed files with 152 additions and 4,145 deletions.
diff --git a/cl/alerts/management/commands/cl_send_alerts.py b/cl/alerts/management/commands/cl_send_alerts.py
@@ -295,8 +295,6 @@ def clean_rt_queue(self):
     def remove_stale_rt_items(self, age=2):
         """Remove anything old from the RTQ.
 
-        This helps avoid issues with solr hitting the maxboolean clause errors.
-
         :param age: How many days old should items be before we start deleting
         them?
         """

diff --git a/cl/alerts/tasks.py b/cl/alerts/tasks.py
@@ -373,27 +373,20 @@ def send_alert_and_webhook(
 
 
 @app.task(ignore_result=True)
-def send_alerts_and_webhooks(
-    data: Dict[str, Union[List[Tuple], List[int]]]
-) -> List[int]:
+def send_alerts_and_webhooks(data: list[tuple[int, datetime]]) -> List[int]:
     """Send many docket alerts at one time without making numerous calls
     to the send_alert_and_webhook function.
 
-    :param data: A dict with up to two keys:
+    :param data: A list of tuples. Each tuple contains the docket ID, and
+        a time. The time indicates that alerts should be sent for
+        items *after* that point.
 
-      d_pks_to_alert: A list of tuples. Each tuple contains the docket ID, and
-                      a time. The time indicates that alerts should be sent for
-                      items *after* that point.
-        rds_for_solr: A list of RECAPDocument ids that need to be sent to Solr
-                      to be made searchable.
-    :returns: Simply passes through the rds_for_solr list, in case it is
-    consumed by the next task. If rds_for_solr is not provided, returns an
-    empty list.
+    :returns: An empty list
     """
-    for args in data["d_pks_to_alert"]:
+    for args in data:
         send_alert_and_webhook(*args)
 
-    return cast(List[int], data.get("rds_for_solr", []))
+    return []
 
 
 @app.task(ignore_result=True)

diff --git a/cl/alerts/tests/tests.py b/cl/alerts/tests/tests.py
@@ -1164,7 +1164,7 @@ def test_send_search_alert_webhooks_rates(self):
             ):
                 # Monthly alerts cannot be run on the 29th, 30th or 31st.
                 with time_machine.travel(self.mock_date, tick=False):
-                    # Send Solr Alerts (Except OA)
+                    # Send Alerts (Except OA)
                     call_command("cl_send_alerts", rate=rate)
                     # Send ES Alerts (Only OA for now)
                     call_command("cl_send_scheduled_alerts", rate=rate)

diff --git a/cl/api/tasks.py b/cl/api/tasks.py
@@ -93,7 +93,7 @@ def send_es_search_alert_webhook(
     """Send a search alert webhook event containing search results from a
     search alert object.
 
-    :param results: The search results returned by SOLR for this alert.
+    :param results: The search results returned for this alert.
     :param webhook_pk: The webhook endpoint ID object to send the event to.
     :param alert: The search alert object.
     """
@@ -134,7 +134,7 @@ def send_search_alert_webhook_es(
     """Send a search alert webhook event containing search results from a
     search alert object.
 
-    :param results: The search results returned by SOLR for this alert.
+    :param results: The search results returned for this alert.
     :param webhook_pk: The webhook endpoint ID object to send the event to.
     :param alert_pk: The search alert ID.
     """

diff --git a/cl/api/webhooks.py b/cl/api/webhooks.py
@@ -166,7 +166,7 @@ def send_search_alert_webhook(
     """Send a search alert webhook event containing search results from a
     search alert object.
 
-    :param results: The search results returned by SOLR for this alert.
+    :param results: The search results returned for this alert.
     :param webhook: The webhook endpoint object to send the event to.
     :param alert: The search alert object.
     """

diff --git a/cl/audio/factories.py b/cl/audio/factories.py
@@ -16,15 +16,6 @@ class Meta:
     sha1 = Faker("sha1")
     download_url = Faker("url")
 
-    @classmethod
-    def _create(cls, model_class, *args, **kwargs):
-        """Creates an instance of the model class without indexing."""
-        obj = model_class(*args, **kwargs)
-        # explicitly sets `index=False` to prevent it from being indexed in SOLR.
-        # Once Solr is removed, we can just remove this method completely.
-        obj.save(index=False)
-        return obj
-
     """
     These hooks are necessary to make this factory compatible with the
     `make_dev_command`. by delegating the file creation to the hooks, we prevent
@@ -60,7 +51,6 @@ def _after_postgeneration(cls, instance, create, results=None):
         if create and results:
             # Some post-generation hooks ran, and may have modified the instance.
             instance.save(
-                index=False,
                 update_fields=["local_path_mp3", "local_path_original_file"],
             )
 

diff --git a/cl/audio/models.py b/cl/audio/models.py
@@ -1,22 +1,11 @@
-from typing import Dict, List, Union
-
 import pghistory
 from django.db import models
-from django.template import loader
-from django.urls import NoReverseMatch, reverse
+from django.urls import reverse
 from model_utils import FieldTracker
 
-from cl.custom_filters.templatetags.text_filters import best_case_name
-from cl.lib.date_time import midnight_pt
 from cl.lib.model_helpers import make_upload_path
 from cl.lib.models import AbstractDateTimeModel, s3_warning_note
-from cl.lib.search_index_utils import (
-    InvalidDocumentError,
-    normalize_search_dicts,
-    null_map,
-)
 from cl.lib.storage import IncrementingAWSMediaStorage
-from cl.lib.utils import deepgetattr
 from cl.people_db.models import Person
 from cl.search.models import SOURCES, Docket
 
@@ -196,98 +185,6 @@ def __str__(self) -> str:
     def get_absolute_url(self) -> str:
         return reverse("view_audio_file", args=[self.pk, self.docket.slug])
 
-    def save(  # type: ignore[override]
-        self,
-        index: bool = True,
-        force_commit: bool = False,
-        *args: List,
-        **kwargs: Dict,
-    ) -> None:
-        """
-        Overrides the normal save method, but provides integration with the
-        bulk files and with Solr indexing.
-
-        :param index: Should the item be added to the Solr index?
-        :param force_commit: Should a commit be performed in solr after
-        indexing it?
-        """
-        super().save(*args, **kwargs)  # type: ignore
-        if index:
-            from cl.search.tasks import add_items_to_solr
-
-            add_items_to_solr([self.pk], "audio.Audio", force_commit)
-
-    def delete(  # type: ignore[override]
-        self,
-        *args: List,
-        **kwargs: Dict,
-    ) -> None:
-        """
-        Update the index as items are deleted.
-        """
-        id_cache = self.pk
-        super().delete(*args, **kwargs)  # type: ignore
-        from cl.search.tasks import delete_items
-
-        delete_items.delay([id_cache], "audio.Audio")
-
-    def as_search_dict(self) -> Dict[str, Union[int, List[int], str]]:
-        """Create a dict that can be ingested by Solr"""
-        # IDs
-        out = {
-            "id": self.pk,
-            "docket_id": self.docket_id,
-            "court_id": self.docket.court_id,
-        }
-
-        # Docket
-        docket = {"docketNumber": self.docket.docket_number}
-        if self.docket.date_argued is not None:
-            docket["dateArgued"] = midnight_pt(self.docket.date_argued)
-        if self.docket.date_reargued is not None:
-            docket["dateReargued"] = midnight_pt(self.docket.date_reargued)
-        if self.docket.date_reargument_denied is not None:
-            docket["dateReargumentDenied"] = midnight_pt(
-                self.docket.date_reargument_denied
-            )
-        out.update(docket)
-
-        # Court
-        out.update(
-            {
-                "court": self.docket.court.full_name,
-                "court_citation_string": self.docket.court.citation_string,
-                "court_exact": self.docket.court_id,  # For faceting
-            }
-        )
-
-        # Audio File
-        out.update(
-            {
-                "caseName": best_case_name(self),
-                "panel_ids": [judge.pk for judge in self.panel.all()],
-                "judge": self.judges,
-                "file_size_mp3": deepgetattr(
-                    self, "local_path_mp3.size", None
-                ),
-                "duration": self.duration,
-                "source": self.source,
-                "download_url": self.download_url,
-                "local_path": deepgetattr(self, "local_path_mp3.name", None),
-            }
-        )
-        try:
-            out["absolute_url"] = self.get_absolute_url()
-        except NoReverseMatch:
-            raise InvalidDocumentError(
-                f"Unable to save to index due to missing absolute_url: {self.pk}"
-            )
-
-        text_template = loader.get_template("indexes/audio_text.txt")
-        out["text"] = text_template.render({"item": self}).translate(null_map)
-
-        return normalize_search_dicts(out)
-
 
 @pghistory.track(
     pghistory.InsertEvent(), pghistory.DeleteEvent(), obj_field=None