Skip to content

Commit

Permalink
Merge pull request #354 from City-of-Turku/feature/search-hyphenate-a…
Browse files Browse the repository at this point in the history
…ddresses

Feature/search hyphenate addresses
  • Loading branch information
juuso-j authored Jun 11, 2024
2 parents 3012793 + 2a4580b commit b9d8536
Show file tree
Hide file tree
Showing 13 changed files with 221 additions and 18 deletions.
2 changes: 1 addition & 1 deletion requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ django-modeltranslation
flake8
requests
requests_cache
git+https://github.com/City-of-Helsinki/django-munigeo@v0.2.76#egg=django-munigeo
git+https://github.com/City-of-Helsinki/django-munigeo@v0.2.84#egg=django-munigeo
pytz
django-cors-headers
django-extensions
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ django-mptt==0.13.4
# via
# -r requirements.in
# django-munigeo
django-munigeo @ git+https://github.com/City-of-Helsinki/django-munigeo@v0.2.76
django-munigeo @ git+https://github.com/City-of-Helsinki/django-munigeo@v0.2.84
# via -r requirements.in
django-polymorphic==3.1.0
# via -r requirements.in
Expand Down
18 changes: 18 additions & 0 deletions services/fixtures/exclusion_words.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
[
{
"model": "services.exclusionword",
"pk": 1,
"fields": {
"word": "katu",
"language_short": "fi"
}
},
{
"model": "services.exclusionword",
"pk": 2,
"fields": {
"word": "tie",
"language_short": "fi"
}
}
]
65 changes: 57 additions & 8 deletions services/management/commands/index_search_columns.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
import logging
from datetime import datetime, timedelta

from django.contrib.postgres.search import SearchVector
from django.core.management.base import BaseCommand
from django.utils import timezone
from munigeo.models import Address, AdministrativeDivision

from services.models import Service, ServiceNode, Unit
from services.search.utils import hyphenate
from services.search.constants import HYPHENATE_ADDRESSES_MODIFIED_WITHIN_DAYS
from services.search.utils import get_foreign_key_attr, hyphenate

logger = logging.getLogger("search")

Expand All @@ -27,28 +30,42 @@ def get_search_column(model, lang):
return search_column


def generate_syllables(model):
def generate_syllables(
model, hyphenate_all_addresses=False, hyphenate_addresses_from=None
):
"""
Generates syllables for the given model.
"""
# Disable sending of signals
model._meta.auto_created = True
save_kwargs = {}
num_populated = 0
for row in model.objects.all():
if model.__name__ == "Address" and not hyphenate_all_addresses:
save_kwargs["skip_modified_at"] = True
if not hyphenate_addresses_from:
hyphenate_addresses_from = Address.objects.latest(
"modified_at"
).modified_at - timedelta(days=HYPHENATE_ADDRESSES_MODIFIED_WITHIN_DAYS)
qs = model.objects.filter(modified_at__gte=hyphenate_addresses_from)
else:
qs = model.objects.all()
for row in qs.iterator(chunk_size=10000):
row.syllables_fi = []
for column in model.get_syllable_fi_columns():
row_content = getattr(row, column, None)
row_content = get_foreign_key_attr(row, column)
if row_content:
# Rows migth be of type str or Array, if str
# cast to array by splitting.
if isinstance(row_content, str):
row_content = row_content.split()
for word in row_content:
syllables = hyphenate(word)
for s in syllables:
row.syllables_fi.append(s)
row.save()
if len(syllables) > 1:
for s in syllables:
row.syllables_fi.append(s)
row.save(**save_kwargs)
num_populated += 1

# Enable sending of signals
model._meta.auto_created = False
return num_populated
Expand Down Expand Up @@ -85,13 +102,45 @@ def index_servicenodes(lang):


class Command(BaseCommand):
def handle(self, *args, **kwargs):

def add_arguments(self, parser):
parser.add_argument(
"--hyphenate_addresses_from",
nargs="?",
type=str,
help="Hyphenate addresses whose modified_at timestamp starts at given timestamp YYYY-MM-DDTHH:MM:SS",
)

parser.add_argument(
"--hyphenate_all_addresses",
action="store_true",
help="Hyphenate all addresses",
)

def handle(self, *args, **options):
hyphenate_all_addresses = options.get("hyphenate_all_addresses", None)
hyphenate_addresses_from = options.get("hyphenate_addresses_from", None)

if hyphenate_addresses_from:
try:
hyphenate_addresses_from = timezone.make_aware(
datetime.strptime(hyphenate_addresses_from, "%Y-%m-%dT%H:%M:%S")
)
except ValueError as err:
raise ValueError(err)

for lang in ["fi", "sv", "en"]:
key = "search_column_%s" % lang
# Only generate syllables for the finnish language
if lang == "fi":
logger.info(f"Generating syllables for language: {lang}.")
logger.info(f"Syllables generated for {generate_syllables(Unit)} Units")
num_populated = generate_syllables(
Address,
hyphenate_all_addresses=hyphenate_all_addresses,
hyphenate_addresses_from=hyphenate_addresses_from,
)
logger.info(f"Syllables generated for {num_populated} Addresses")
logger.info(
f"Syllables generated for {generate_syllables(Service)} Services"
)
Expand Down
37 changes: 37 additions & 0 deletions services/migrations/0101_exclusionword.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Generated by Django 4.1.13 on 2024-05-13 10:52

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("services", "0100_alter_unitconnection_section_type"),
]

operations = [
migrations.CreateModel(
name="ExclusionWord",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("word", models.CharField(max_length=100, verbose_name="Word")),
(
"language_short",
models.CharField(max_length=2, verbose_name="Language short"),
),
],
options={
"verbose_name": "Exclusion word",
"verbose_name_plural": "Exclusion words",
"ordering": ["-id"],
},
),
]
2 changes: 1 addition & 1 deletion services/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from .department import Department
from .keyword import Keyword
from .notification import Announcement, ErrorMessage
from .search_rule import ExclusionRule
from .search_rule import ExclusionRule, ExclusionWord
from .service import Service, UnitServiceDetails
from .service_mapping import ServiceMapping
from .service_node import ServiceNode
Expand Down
13 changes: 13 additions & 0 deletions services/models/search_rule.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,16 @@ class Meta:

def __str__(self):
return "%s : %s" % (self.word, self.exclusion)


class ExclusionWord(models.Model):
word = models.CharField(max_length=100, verbose_name=_("Word"))
language_short = models.CharField(max_length=2, verbose_name=_("Language short"))

class Meta:
ordering = ["-id"]
verbose_name = _("Exclusion word")
verbose_name_plural = _("Exclusion words")

def __str__(self):
return self.word
15 changes: 13 additions & 2 deletions services/search/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,15 @@

from django.db import connection, reset_queries
from django.db.models import Count
from django.utils.decorators import method_decorator
from django.views.decorators.cache import cache_page
from drf_spectacular.utils import extend_schema, OpenApiParameter
from munigeo import api as munigeo_api
from munigeo.models import Address, AdministrativeDivision
from rest_framework import serializers
from rest_framework import serializers, status
from rest_framework.exceptions import ParseError
from rest_framework.generics import GenericAPIView
from rest_framework.response import Response

from services.api import (
TranslatedModelSerializer,
Expand Down Expand Up @@ -60,6 +63,7 @@
get_search_exclusions,
get_service_node_results,
get_trigram_results,
has_exclusion_word_in_query,
set_address_fields,
set_service_node_unit_count,
set_service_unit_count,
Expand Down Expand Up @@ -318,6 +322,7 @@ def to_representation(self, obj):
class SearchViewSet(GenericAPIView):
queryset = Unit.objects.all()

@method_decorator(cache_page(60 * 60))
def get(self, request):
model_limits = {}
show_only_address = False
Expand All @@ -331,7 +336,6 @@ def get(self, request):
raise ParseError("Supply search terms with 'q=' ' or input=' '")

if not re.match(r"^[\w\såäö.'+&|-]+$", q_val):

raise ParseError(
"Invalid search terms, only letters, numbers, spaces and .'+-&| allowed."
)
Expand Down Expand Up @@ -447,6 +451,13 @@ def get(self, request):
search_query_str += f"& {q}:*"
else:
search_query_str = f"{q}:*"

if has_exclusion_word_in_query(q_vals, language_short):
return Response(
f"Search query {q_vals} would return too many results",
status=status.HTTP_400_BAD_REQUEST,
)

search_fn = "to_tsquery"
if use_websearch:
exclusions = get_search_exclusions(q)
Expand Down
4 changes: 3 additions & 1 deletion services/search/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,11 @@
"Address",
)
QUERY_PARAM_TYPE_NAMES = [m.lower() for m in SEARCHABLE_MODEL_TYPE_NAMES]
# None will slice to the end of list, e.g. no limit.
# None will slice to the end of list, i.e., no limit.
DEFAULT_MODEL_LIMIT_VALUE = None
# The limit value for the search query that search the search_view. "NULL" = no limit
DEFAULT_SEARCH_SQL_LIMIT_VALUE = "NULL"
DEFAULT_TRIGRAM_THRESHOLD = 0.15
DEFAULT_RANK_THRESHOLD = 1

HYPHENATE_ADDRESSES_MODIFIED_WITHIN_DAYS = 7
17 changes: 17 additions & 0 deletions services/search/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from services.models import (
Department,
ExclusionRule,
ExclusionWord,
Service,
ServiceNode,
Unit,
Expand Down Expand Up @@ -277,6 +278,15 @@ def addresses(streets, municipality):
number=1,
full_name="Tarkk'ampujankatu 1",
)
Address.objects.create(
municipality_id=municipality.id,
location=Point(60.44879002342721, 22.283629416961055),
id=7,
street_id=46,
number=1,
full_name="Kellonsoittajankatu 1",
)
generate_syllables(Address)
Address.objects.update(search_column_fi=get_search_column(Address, "fi"))
return Address.objects.all()

Expand Down Expand Up @@ -314,10 +324,17 @@ def streets():
Street.objects.create(id=43, name="Markulantie", municipality_id="turku")
Street.objects.create(id=44, name="Yliopistonkatu", municipality_id="turku")
Street.objects.create(id=45, name="Tarkk'ampujankatu", municipality_id="turku")
Street.objects.create(id=46, name="Kellonsoittajankatu", municipality_id="turku")
return Street.objects.all()


@pytest.fixture
def exclusion_rules():
ExclusionRule.objects.create(id=1, word="tekojää", exclusion="-nurmi")
return ExclusionRule.objects.all()


@pytest.fixture
def exclusion_words():
ExclusionWord.objects.create(id=1, word="katu", language_short="fi")
return ExclusionWord.objects.all()
17 changes: 17 additions & 0 deletions services/search/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ def test_search(
accessibility_shortcoming,
municipality,
exclusion_rules,
exclusion_words,
):
# Search for "museo" in entities: units,services and servicenods
url = reverse("search") + "?q=museo&type=unit,service,servicenode"
Expand Down Expand Up @@ -121,6 +122,22 @@ def test_search(
assert kurrapolku["location"]["type"] == "Point"
assert kurrapolku["location"]["coordinates"][0] == 60.479032
assert kurrapolku["location"]["coordinates"][1] == 22.25417
# Test search with excluded word
url = reverse("search") + "?q=katu"
response = api_client.get(url)
assert response.status_code == 400
url = reverse("search") + "?q=Katu"
response = api_client.get(url)
assert response.status_code == 400
url = reverse("search") + "?q=koti katu"
response = api_client.get(url)
assert response.status_code == 400
# Test search with 'kello'
url = reverse("search") + "?q=kello&type=address"
response = api_client.get(url)
results = response.json()["results"]
assert len(results) == 1
assert results[0]["name"]["fi"] == "Kellonsoittajankatu 1"
# Test address search with apostrophe in query
url = reverse("search") + "?q=tarkk'ampujankatu&type=address"
response = api_client.get(url)
Expand Down
Loading

0 comments on commit b9d8536

Please sign in to comment.