Skip to content

Commit

Permalink
Refactor text search functions
Browse files Browse the repository at this point in the history
Simplify text search to a single function
  • Loading branch information
matti-lamppu committed Sep 2, 2024
1 parent 2c1cc45 commit 6320482
Show file tree
Hide file tree
Showing 8 changed files with 78 additions and 57 deletions.
18 changes: 5 additions & 13 deletions api/graphql/types/allocated_time_slot/filtersets.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import django_filters
from django.contrib.postgres.search import SearchVector
from django.db import models
from graphene_django_extensions import ModelFilterSet
from graphene_django_extensions.filters import EnumMultipleChoiceFilter, IntChoiceFilter, IntMultipleChoiceFilter
Expand All @@ -8,7 +7,7 @@
from applications.enums import ApplicantTypeChoice, ApplicationSectionStatusChoice, Weekday
from applications.models import AllocatedTimeSlot
from applications.querysets.allocated_time_slot import AllocatedTimeSlotQuerySet
from common.db import raw_prefixed_query
from common.db import text_search

__all__ = [
"AllocatedTimeSlotFilterSet",
Expand Down Expand Up @@ -58,22 +57,15 @@ class Meta:
def filter_by_section_status(qs: AllocatedTimeSlotQuerySet, name: str, value: list[str]) -> models.QuerySet:
return qs.has_section_status_in(value)

@staticmethod
def filter_text_search(qs: AllocatedTimeSlotQuerySet, name: str, value: str) -> models.QuerySet:
# If this becomes slow, look into optimisation strategies here:
# https://docs.djangoproject.com/en/4.2/ref/contrib/postgres/search/#performance
vector = SearchVector(
def filter_text_search(self, qs: AllocatedTimeSlotQuerySet, name: str, value: str) -> models.QuerySet:
fields = (
"reservation_unit_option__application_section__id",
"reservation_unit_option__application_section__name",
"reservation_unit_option__application_section__application__id",
"applicant",
)
query = raw_prefixed_query(value)
return (
qs.alias(applicant=L("reservation_unit_option__application_section__application__applicant"))
.annotate(search=vector)
.filter(search=query)
)
qs = qs.alias(applicant=L("reservation_unit_option__application_section__application__applicant"))
return text_search(qs=qs, fields=fields, text=value)

@staticmethod
def order_by_allocated_time_of_week(qs: AllocatedTimeSlotQuerySet, desc: bool) -> models.QuerySet:
Expand Down
11 changes: 4 additions & 7 deletions api/graphql/types/application/filtersets.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import django_filters
from django.contrib.postgres.search import SearchVector
from django.db import models
from django.db.models import QuerySet
from graphene_django_extensions import ModelFilterSet
Expand All @@ -9,7 +8,7 @@
from applications.enums import ApplicantTypeChoice, ApplicationStatusChoice
from applications.models import Application
from applications.querysets.application import ApplicationQuerySet
from common.db import raw_prefixed_query
from common.db import text_search

__all__ = [
"ApplicationFilterSet",
Expand Down Expand Up @@ -39,11 +38,9 @@ class Meta:

@staticmethod
def filter_by_text_search(qs: ApplicationQuerySet, name: str, value: str) -> models.QuerySet:
# If this becomes slow, look into optimisation strategies here:
# https://docs.djangoproject.com/en/4.2/ref/contrib/postgres/search/#performance
vector = SearchVector("id", "application_sections__id", "application_sections__name", "applicant")
query = raw_prefixed_query(value)
return qs.alias(applicant=L("applicant")).annotate(search=vector).filter(search=query)
fields = ("id", "application_sections__id", "application_sections__name", "applicant")
qs = qs.alias(applicant=L("applicant"))
return text_search(qs=qs, fields=fields, text=value)

@staticmethod
def filter_by_status(qs: ApplicationQuerySet, name: str, value: list[str]) -> QuerySet:
Expand Down
11 changes: 4 additions & 7 deletions api/graphql/types/application_section/filtersets.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from typing import Any

import django_filters
from django.contrib.postgres.search import SearchVector
from django.db import models
from django.db.models import QuerySet
from django.db.models.functions import Lower
Expand All @@ -16,7 +15,7 @@
from applications.enums import ApplicantTypeChoice, ApplicationSectionStatusChoice, ApplicationStatusChoice, Priority
from applications.models import ApplicationSection
from applications.querysets.application_section import ApplicationSectionQuerySet
from common.db import raw_prefixed_query
from common.db import text_search

__all__ = [
"ApplicationSectionFilterSet",
Expand Down Expand Up @@ -87,11 +86,9 @@ def filter_include_preferred_order(qs: ApplicationSectionQuerySet, name: str, va

@staticmethod
def filter_text_search(qs: ApplicationSectionQuerySet, name: str, value: str) -> QuerySet:
# If this becomes slow, look into optimisation strategies here:
# https://docs.djangoproject.com/en/4.2/ref/contrib/postgres/search/#performance
vector = SearchVector("application__id", "id", "name", "applicant")
query = raw_prefixed_query(value)
return qs.alias(applicant=L("application__applicant")).annotate(search=vector).filter(search=query)
fields = ("application__id", "id", "name", "applicant")
qs = qs.alias(applicant=L("application__applicant"))
return text_search(qs=qs, fields=fields, text=value)

def filter_has_allocations(self, queryset: ApplicationSectionQuerySet, name: str, value: bool) -> QuerySet:
if value:
Expand Down
9 changes: 4 additions & 5 deletions api/graphql/types/rejected_occurrence/filtersets.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
import django_filters
from django.contrib.postgres.search import SearchVector
from django.db import models
from graphene_django_extensions import ModelFilterSet
from graphene_django_extensions.filters import IntChoiceFilter, IntMultipleChoiceFilter
from lookup_property import L

from common.db import raw_prefixed_query
from common.db import text_search
from reservations.models import RejectedOccurrence
from reservations.querysets import RejectedOccurrenceQuerySet

Expand Down Expand Up @@ -79,13 +78,12 @@ class Meta:
def filter_text_search(qs: RejectedOccurrenceQuerySet, name: str, value: str) -> models.QuerySet:
# If this becomes slow, look into optimisation strategies here:
# https://docs.djangoproject.com/en/4.2/ref/contrib/postgres/search/#performance
vector = SearchVector(
fields = (
"recurring_reservation__allocated_time_slot__reservation_unit_option__application_section__id",
"recurring_reservation__allocated_time_slot__reservation_unit_option__application_section__name",
"recurring_reservation__allocated_time_slot__reservation_unit_option__application_section__application__id",
"applicant",
)
query = raw_prefixed_query(value)
applicant_ref = (
"recurring_reservation"
"__allocated_time_slot"
Expand All @@ -94,7 +92,8 @@ def filter_text_search(qs: RejectedOccurrenceQuerySet, name: str, value: str) ->
"__application"
"__applicant"
)
return qs.alias(applicant=L(applicant_ref)).annotate(search=vector).filter(search=query)
qs = qs.alias(applicant=L(applicant_ref))
return text_search(qs=qs, fields=fields, text=value)

@staticmethod
def order_by_applicant(qs: RejectedOccurrenceQuerySet, desc: bool) -> models.QuerySet:
Expand Down
16 changes: 4 additions & 12 deletions api/graphql/types/reservation/filtersets.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,14 @@
from typing import TYPE_CHECKING

import django_filters
from django.contrib.postgres.search import SearchRank, SearchVector
from django.db import models
from django.db.models import Case, CharField, F, Q, QuerySet, Value, When
from django.db.models.functions import Concat
from graphene_django_extensions import ModelFilterSet
from graphene_django_extensions.filters import EnumMultipleChoiceFilter, IntMultipleChoiceFilter

from api.graphql.extensions.filters import TimezoneAwareDateFilter
from common.db import raw_prefixed_query
from common.db import text_search
from merchants.enums import OrderStatusWithFree
from permissions.helpers import has_general_permission
from permissions.models import GeneralPermissionChoices, UnitPermissionChoices
Expand Down Expand Up @@ -148,8 +147,7 @@ def filter_by_reservation_unit_name(qs: QuerySet, name: str, value: str) -> Quer

return qs.filter(q).distinct()

@staticmethod
def filter_by_text_search(qs: QuerySet, name: str, value: str) -> QuerySet:
def filter_by_text_search(self, qs: QuerySet, name: str, value: str) -> QuerySet:
value = value.strip()
if not value:
return qs
Expand All @@ -159,7 +157,7 @@ def filter_by_text_search(qs: QuerySet, name: str, value: str) -> QuerySet:
return qs.filter(Q(user__email__icontains=value) | Q(reservee_email__icontains=value))

if len(value) >= 3:
vector = SearchVector(
fields = (
"pk",
"name",
"reservee_id",
Expand All @@ -172,13 +170,7 @@ def filter_by_text_search(qs: QuerySet, name: str, value: str) -> QuerySet:
"user__last_name",
"recurring_reservation__name",
)
query = raw_prefixed_query(value)
text_search_rank = SearchRank(vector, query)
return (
qs.annotate(search=vector, text_search_rank=text_search_rank)
.filter(search=query)
.order_by("-text_search_rank") # most relevant first
)
return text_search(qs=qs, fields=fields, text=value)

if value.isnumeric():
return qs.filter(pk=int(value))
Expand Down
56 changes: 46 additions & 10 deletions common/db.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from typing import Any
from collections.abc import Iterable
from typing import Any, Literal, TypeVar

from django.contrib.postgres.fields import ArrayField
from django.contrib.postgres.search import SearchQuery
from django.contrib.postgres.search import SearchQuery, SearchRank, SearchVector
from django.db import models

__all__ = [
Expand All @@ -10,7 +11,7 @@
"SubqueryArray",
"SubqueryCount",
"SubquerySum",
"raw_prefixed_query",
"text_search",
]


Expand Down Expand Up @@ -161,13 +162,48 @@ class ArrayUnnest(models.Func):
arity = 1


def raw_prefixed_query(text: str) -> SearchQuery:
TQuerySet = TypeVar("TQuerySet")


def text_search(
qs: TQuerySet,
fields: Iterable[str],
text: str,
language: Literal["finnish", "english", "swedish"] = "finnish",
) -> TQuerySet:
"""
Create a query that searches for each word separately and matched partial words if match is a prefix.
Remove any whitespace and replace any single quote mark with two quote marks.
Query with postgres full text search.
https://www.postgresql.org/docs/current/datatype-textsearch.html#DATATYPE-TSQUERY
:param qs: QuerySet to filter.
:param fields: Fields to search.
:param text: Text to search for.
:param language: Language to search in.
"""
# If this becomes slow, look into optimisation strategies here:
# https://docs.djangoproject.com/en/5.1/ref/contrib/postgres/search/#performance
vector = SearchVector(*fields, config=language)

search = build_search(text)
query = SearchQuery(value=search, config=language, search_type="raw")
rank = SearchRank(vector, query)
return qs.annotate(ts_vector=vector, ts_rank=rank).filter(ts_vector=query)


def build_search(text: str) -> str:
"""
Build raw postgres full text search query from text.
Replace single quotes with two single quotes so that they are treated as whitespace in the search.
Quote search terms, and support prefix matching.
Match all search terms with the OR operator.
Ref. https://www.postgresql.org/docs/current/datatype-textsearch.html#DATATYPE-TSQUERY
"""
return SearchQuery(
" | ".join(f"""'{value.replace("'", "''")}':*""" for value in text.split(" ") if value != ""),
search_type="raw",
)
search_terms: list[str] = []
for value in text.split(" "):
if value:
value = value.replace("'", "''")
value = f"'{value}':*"
search_terms.append(value)
return " | ".join(search_terms)
8 changes: 8 additions & 0 deletions common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@

from django.conf import settings
from django.db import models
from django.http import HttpRequest
from django.utils import translation
from django.utils.functional import Promise
from django.utils.translation import get_language_from_request
from modeltranslation.manager import get_translatable_fields_for_model

from users.models import User
Expand All @@ -14,6 +16,7 @@
"comma_sep_str",
"get_attr_by_language",
"get_field_to_related_field_mapping",
"get_text_search_language",
"get_translation_fields",
"with_indices",
]
Expand Down Expand Up @@ -148,6 +151,11 @@ def translate_for_user(text: Promise, user: User) -> str:
return str(text)


def get_text_search_language(request: HttpRequest) -> Literal["finnish", "english", "swedish"]:
lang_code: Literal["fi", "en", "sv"] = get_language_from_request(request)
return "swedish" if lang_code == "sv" else "english" if lang_code == "en" else "finnish"


def safe_getattr(obj: object, dotted_path: str, default: Any = None) -> Any:
"""
Examples:
Expand Down
6 changes: 3 additions & 3 deletions tests/test_graphql_api/test_application/test_filtering.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,19 +261,19 @@ def test_application__filter__by_text_search__section_name__prefix(graphql):
organisation=None,
contact_person=None,
user=None,
application_sections__name="foo",
application_sections__name="kirjastoryhmä",
)
ApplicationFactory.create_in_status_draft(
organisation=None,
contact_person=None,
user=None,
application_sections__name="bar",
application_sections__name="suunnistusryhmä",
)
graphql.login_user_based_on_type(UserType.SUPERUSER)

# when:
# - User tries to filter applications with a text search, which is only a prefix match
query = applications_query(text_search="fo")
query = applications_query(text_search="kirjasto")
response = graphql(query)

# then:
Expand Down

0 comments on commit 6320482

Please sign in to comment.