Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/search hyphenate addresses #354

Merged
merged 20 commits into from
Jun 11, 2024
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ django-modeltranslation
flake8
requests
requests_cache
git+https://github.com/City-of-Helsinki/django-munigeo@v0.2.76#egg=django-munigeo
git+https://github.com/City-of-Helsinki/django-munigeo@v0.2.83#egg=django-munigeo
pytz
django-cors-headers
django-extensions
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ django-mptt==0.13.4
# via
# -r requirements.in
# django-munigeo
django-munigeo @ git+https://github.com/City-of-Helsinki/django-munigeo@v0.2.76
django-munigeo @ git+https://github.com/City-of-Helsinki/django-munigeo@v0.2.83
# via -r requirements.in
django-polymorphic==3.1.0
# via -r requirements.in
Expand Down
18 changes: 18 additions & 0 deletions services/fixtures/exclusion_words.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
[
{
"model": "services.exclusionword",
"pk": 1,
"fields": {
"word": "katu",
"language_short": "fi"
}
},
{
"model": "services.exclusionword",
"pk": 2,
"fields": {
"word": "tie",
"language_short": "fi"
}
}
]
64 changes: 56 additions & 8 deletions services/management/commands/index_search_columns.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
import logging
from datetime import datetime, timedelta

from django.contrib.postgres.search import SearchVector
from django.core.management.base import BaseCommand
from django.utils import timezone
from munigeo.models import Address, AdministrativeDivision

from services.models import Service, ServiceNode, Unit
from services.search.utils import hyphenate
from services.search.constants import HYPHENATE_ADDRESSES_MODIFIED_WITHIN_DAYS
from services.search.utils import get_foreign_key_attr, hyphenate

logger = logging.getLogger("search")

Expand All @@ -27,28 +30,42 @@ def get_search_column(model, lang):
return search_column


def generate_syllables(model):
def generate_syllables(
model, hyphenate_all_addresses=False, hyphenate_addresses_from=None
):
"""
Generates syllables for the given model.
"""
# Disable sending of signals
model._meta.auto_created = True
save_kwargs = {}
num_populated = 0
for row in model.objects.all():
if model.__name__ == "Address" and not hyphenate_all_addresses:
save_kwargs["skip_modified_at"] = True
if not hyphenate_addresses_from:
hyphenate_addresses_from = Address.objects.latest(
"modified_at"
).modified_at - timedelta(days=HYPHENATE_ADDRESSES_MODIFIED_WITHIN_DAYS)
qs = model.objects.filter(modified_at__gte=hyphenate_addresses_from)
else:
qs = model.objects.all()
for row in qs:
row.syllables_fi = []
for column in model.get_syllable_fi_columns():
row_content = getattr(row, column, None)
row_content = get_foreign_key_attr(row, column)
if row_content:
# Rows migth be of type str or Array, if str
# cast to array by splitting.
if isinstance(row_content, str):
row_content = row_content.split()
for word in row_content:
syllables = hyphenate(word)
for s in syllables:
row.syllables_fi.append(s)
row.save()
if len(syllables) > 1:
for s in syllables:
row.syllables_fi.append(s)
row.save(**save_kwargs)
num_populated += 1

# Enable sending of signals
model._meta.auto_created = False
return num_populated
Expand Down Expand Up @@ -85,13 +102,44 @@ def index_servicenodes(lang):


class Command(BaseCommand):
def handle(self, *args, **kwargs):

def add_arguments(self, parser):
parser.add_argument(
"--hyphenate_addresses_from",
nargs="?",
type=str,
help="Hyphenate addresses whose modified_at timestamp starts at given timestamp YYYY-MM-DDTHH:MM:SS",
)

parser.add_argument(
"--hyphenate_all_addresses",
action="store_true",
help="Hyphenate all addresses",
)

def handle(self, *args, **options):
hyphenate_all_addresses = options.get("hyphenate_all_addresses", None)
hyphenate_addresses_from = options.get("hyphenate_addresses_from", None)

if hyphenate_addresses_from:
try:
hyphenate_addresses_from = timezone.make_aware(
datetime.strptime(hyphenate_addresses_from, "%Y-%m-%dT%H:%M:%S")
)
except ValueError as err:
raise ValueError(err)
for lang in ["fi", "sv", "en"]:
key = "search_column_%s" % lang
# Only generate syllables for the finnish language
if lang == "fi":
logger.info(f"Generating syllables for language: {lang}.")
logger.info(f"Syllables generated for {generate_syllables(Unit)} Units")
num_populated = generate_syllables(
Address,
hyphenate_all_addresses=hyphenate_all_addresses,
hyphenate_addresses_from=hyphenate_addresses_from,
)
logger.info(f"Syllables generated for {num_populated} Addresses")
logger.info(
f"Syllables generated for {generate_syllables(Service)} Services"
)
Expand Down
37 changes: 37 additions & 0 deletions services/migrations/0101_exclusionword.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Generated by Django 4.1.13 on 2024-05-13 10:52

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("services", "0100_alter_unitconnection_section_type"),
]

operations = [
migrations.CreateModel(
name="ExclusionWord",
fields=[
(
"id",
models.AutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("word", models.CharField(max_length=100, verbose_name="Word")),
(
"language_short",
models.CharField(max_length=2, verbose_name="Language short"),
),
],
options={
"verbose_name": "Exclusion word",
"verbose_name_plural": "Exclusion words",
"ordering": ["-id"],
},
),
]
2 changes: 1 addition & 1 deletion services/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from .department import Department
from .keyword import Keyword
from .notification import Announcement, ErrorMessage
from .search_rule import ExclusionRule
from .search_rule import ExclusionRule, ExclusionWord
from .service import Service, UnitServiceDetails
from .service_mapping import ServiceMapping
from .service_node import ServiceNode
Expand Down
13 changes: 13 additions & 0 deletions services/models/search_rule.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,16 @@ class Meta:

def __str__(self):
return "%s : %s" % (self.word, self.exclusion)


class ExclusionWord(models.Model):
word = models.CharField(max_length=100, verbose_name=_("Word"))
language_short = models.CharField(max_length=2, verbose_name=_("Language short"))

class Meta:
ordering = ["-id"]
verbose_name = _("Exclusion word")
verbose_name_plural = _("Exclusion words")

def __str__(self):
return self.word
12 changes: 10 additions & 2 deletions services/search/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,10 @@
from drf_spectacular.utils import extend_schema, OpenApiParameter
from munigeo import api as munigeo_api
from munigeo.models import Address, AdministrativeDivision
from rest_framework import serializers
from rest_framework import serializers, status
from rest_framework.exceptions import ParseError
from rest_framework.generics import GenericAPIView
from rest_framework.response import Response

from services.api import (
TranslatedModelSerializer,
Expand Down Expand Up @@ -60,6 +61,7 @@
get_search_exclusions,
get_service_node_results,
get_trigram_results,
has_exclusion_word_in_query,
set_address_fields,
set_service_node_unit_count,
set_service_unit_count,
Expand Down Expand Up @@ -331,7 +333,6 @@ def get(self, request):
raise ParseError("Supply search terms with 'q=' ' or input=' '")

if not re.match(r"^[\w\såäö.'+&|-]+$", q_val):

raise ParseError(
"Invalid search terms, only letters, numbers, spaces and .'+-&| allowed."
)
Expand Down Expand Up @@ -447,6 +448,13 @@ def get(self, request):
search_query_str += f"& {q}:*"
else:
search_query_str = f"{q}:*"

if has_exclusion_word_in_query(q_vals, language_short):
return Response(
f"Search query {q_vals} would return too many results",
status=status.HTTP_400_BAD_REQUEST,
)

search_fn = "to_tsquery"
if use_websearch:
exclusions = get_search_exclusions(q)
Expand Down
4 changes: 3 additions & 1 deletion services/search/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,11 @@
"Address",
)
QUERY_PARAM_TYPE_NAMES = [m.lower() for m in SEARCHABLE_MODEL_TYPE_NAMES]
# None will slice to the end of list, e.g. no limit.
# None will slice to the end of list, i.e., no limit.
DEFAULT_MODEL_LIMIT_VALUE = None
# The limit value for the search query that search the search_view. "NULL" = no limit
DEFAULT_SEARCH_SQL_LIMIT_VALUE = "NULL"
DEFAULT_TRIGRAM_THRESHOLD = 0.15
DEFAULT_RANK_THRESHOLD = 1

HYPHENATE_ADDRESSES_MODIFIED_WITHIN_DAYS = 7
17 changes: 17 additions & 0 deletions services/search/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from services.models import (
Department,
ExclusionRule,
ExclusionWord,
Service,
ServiceNode,
Unit,
Expand Down Expand Up @@ -277,6 +278,15 @@ def addresses(streets, municipality):
number=1,
full_name="Tarkk'ampujankatu 1",
)
Address.objects.create(
municipality_id=municipality.id,
location=Point(60.44879002342721, 22.283629416961055),
id=7,
street_id=46,
number=1,
full_name="Kellonsoittajankatu 1",
)
generate_syllables(Address)
Address.objects.update(search_column_fi=get_search_column(Address, "fi"))
return Address.objects.all()

Expand Down Expand Up @@ -314,10 +324,17 @@ def streets():
Street.objects.create(id=43, name="Markulantie", municipality_id="turku")
Street.objects.create(id=44, name="Yliopistonkatu", municipality_id="turku")
Street.objects.create(id=45, name="Tarkk'ampujankatu", municipality_id="turku")
Street.objects.create(id=46, name="Kellonsoittajankatu", municipality_id="turku")
return Street.objects.all()


@pytest.fixture
def exclusion_rules():
ExclusionRule.objects.create(id=1, word="tekojää", exclusion="-nurmi")
return ExclusionRule.objects.all()


@pytest.fixture
def exclusion_words():
ExclusionWord.objects.create(id=1, word="katu", language_short="fi")
return ExclusionWord.objects.all()
17 changes: 17 additions & 0 deletions services/search/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ def test_search(
accessibility_shortcoming,
municipality,
exclusion_rules,
exclusion_words,
):
# Search for "museo" in entities: units,services and servicenods
url = reverse("search") + "?q=museo&type=unit,service,servicenode"
Expand Down Expand Up @@ -121,6 +122,22 @@ def test_search(
assert kurrapolku["location"]["type"] == "Point"
assert kurrapolku["location"]["coordinates"][0] == 60.479032
assert kurrapolku["location"]["coordinates"][1] == 22.25417
# Test search with excluded word
url = reverse("search") + "?q=katu"
response = api_client.get(url)
assert response.status_code == 400
url = reverse("search") + "?q=Katu"
response = api_client.get(url)
assert response.status_code == 400
url = reverse("search") + "?q=koti katu"
response = api_client.get(url)
assert response.status_code == 400
# Test search with 'kello'
url = reverse("search") + "?q=kello&type=address"
response = api_client.get(url)
results = response.json()["results"]
assert len(results) == 1
assert results[0]["name"]["fi"] == "Kellonsoittajankatu 1"
# Test address search with apostrophe in query
url = reverse("search") + "?q=tarkk'ampujankatu&type=address"
response = api_client.get(url)
Expand Down
Loading
Loading