From f4b80f0bc47efd76ea20863da519645d7fe498ab Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 12 Apr 2024 08:28:50 +0000 Subject: [PATCH 01/47] Bump black from 22.6.0 to 24.3.0 Bumps [black](https://github.com/psf/black) from 22.6.0 to 24.3.0. - [Release notes](https://github.com/psf/black/releases) - [Changelog](https://github.com/psf/black/blob/main/CHANGES.md) - [Commits](https://github.com/psf/black/compare/22.6.0...24.3.0) --- updated-dependencies: - dependency-name: black dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- requirements-dev.txt | 14 +++++++++++++- requirements.txt | 12 +++++++----- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index e823f0278..bbe45dff4 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -8,7 +8,7 @@ asttokens==2.0.5 # via stack-data backcall==0.2.0 # via ipython -black==22.6.0 +black==24.3.0 # via # -c requirements.txt # ipython @@ -32,6 +32,10 @@ mypy-extensions==0.4.3 # via # -c requirements.txt # black +packaging==24.0 + # via + # -c requirements.txt + # black parso==0.8.2 # via # -c requirements.txt @@ -64,10 +68,18 @@ six==1.16.0 # asttokens stack-data==0.2.0 # via ipython +tomli==1.2.1 + # via + # -c requirements.txt + # black traitlets==5.1.0 # via # ipython # matplotlib-inline +typing-extensions==4.11.0 + # via + # -c requirements.txt + # black wcwidth==0.2.5 # via # -c requirements.txt diff --git a/requirements.txt b/requirements.txt index b3cf0e537..37cbd7014 100644 --- a/requirements.txt +++ b/requirements.txt @@ -20,7 +20,7 @@ attrs==21.2.0 # requests-cache billiard==3.6.4.0 # via celery -black==22.6.0 +black==24.3.0 # via -r requirements.in cattrs==1.8.0 # via requests-cache @@ -138,8 +138,10 @@ numpy==1.23.0 # via # -r requirements.in # pandas -packaging==21.0 - # via pytest +packaging==24.0 + # via + # black + # pytest pandas==2.0.1 # via -r requirements.in parso==0.8.2 @@ -172,8 +174,6 @@ pyflakes==2.3.1 # via flake8 pykml==0.2.0 # via -r requirements.in -pyparsing==2.4.7 - # via packaging pyrsistent==0.19.3 # via jsonschema pyshp==2.3.1 @@ -240,6 +240,8 @@ tomli==1.2.1 # pep517 tqdm==4.62.3 # via -r requirements.in +typing-extensions==4.11.0 + # via black tzdata==2022.1 # via # django-celery-beat From 2c3d535b735b5e0025a3e4d42de9d8ec9c64a54d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 12 Apr 2024 08:46:15 +0000 Subject: [PATCH 02/47] Bump idna from 3.2 to 3.7 Bumps [idna](https://github.com/kjd/idna) from 3.2 to 3.7. - [Release notes](https://github.com/kjd/idna/releases) - [Changelog](https://github.com/kjd/idna/blob/master/HISTORY.rst) - [Commits](https://github.com/kjd/idna/compare/v3.2...v3.7) --- updated-dependencies: - dependency-name: idna dependency-type: indirect ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index b3cf0e537..cc4f3a98c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -108,7 +108,7 @@ flake8==3.9.2 # pep8-naming flake8-polyfill==1.0.2 # via pep8-naming -idna==3.2 +idna==3.7 # via requests inflection==0.5.1 # via drf-spectacular From 8979957d4b7a790bab109dc4564d9e4bb558eeee Mon Sep 17 00:00:00 2001 From: juuso-j <68938778+juuso-j@users.noreply.github.com> Date: Tue, 16 Apr 2024 10:59:46 +0300 Subject: [PATCH 03/47] Format --- .../management/commands/weather_observation_utils.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/environment_data/management/commands/weather_observation_utils.py b/environment_data/management/commands/weather_observation_utils.py index 3bde67721..b5d510f50 100644 --- a/environment_data/management/commands/weather_observation_utils.py +++ b/environment_data/management/commands/weather_observation_utils.py @@ -45,9 +45,9 @@ def get_dataframe(stations, from_year=START_YEAR, from_month=1, initial_import=F if not initial_import and from_year == current_date_time.year: params["startTime"] = f"{from_year}-{from_month}-01T00:00Z" else: - params[ - "startTime" - ] = f"{start_date_time.year}-{start_date_time.month}-01T00:00Z" + params["startTime"] = ( + f"{start_date_time.year}-{start_date_time.month}-01T00:00Z" + ) if current_date_time - relativedelta(months=1) < start_date_time: params["endTime"] = current_date_time.strftime(TIME_FORMAT) else: @@ -56,9 +56,9 @@ def get_dataframe(stations, from_year=START_YEAR, from_month=1, initial_import=F + relativedelta(months=1) - relativedelta(hours=1) ) - params[ - "endTime" - ] = f"{tmp_time.year}-{tmp_time.month}-{tmp_time.day}T23:00Z" + params["endTime"] = ( + f"{tmp_time.year}-{tmp_time.month}-{tmp_time.day}T23:00Z" + ) response = REQUEST_SESSION.get(DATA_URL, params=params) logger.info(f"Requested data from: {response.url}") From b0b58d63b3ce58a8deea540c0e6d4b771db46f51 Mon Sep 17 00:00:00 2001 From: juuso-j <68938778+juuso-j@users.noreply.github.com> Date: Tue, 16 Apr 2024 11:00:34 +0300 Subject: [PATCH 04/47] Format --- eco_counter/tests/test_import_counter_data.py | 1 + 1 file changed, 1 insertion(+) diff --git a/eco_counter/tests/test_import_counter_data.py b/eco_counter/tests/test_import_counter_data.py index bad84144e..3ed15b6da 100644 --- a/eco_counter/tests/test_import_counter_data.py +++ b/eco_counter/tests/test_import_counter_data.py @@ -6,6 +6,7 @@ The main purpose of these tests are to verify that the importer imports and calculates the data correctly. """ + import calendar from datetime import datetime, timedelta from io import StringIO From cd63af04882500dc2ce249e807565246fd5fdf1b Mon Sep 17 00:00:00 2001 From: juuso-j <68938778+juuso-j@users.noreply.github.com> Date: Tue, 16 Apr 2024 11:02:15 +0300 Subject: [PATCH 05/47] Format --- mobility_data/importers/bicycle_stands.py | 1 + mobility_data/importers/bike_service_stations.py | 6 +++--- mobility_data/importers/marinas.py | 1 + mobility_data/importers/share_car_parking_places.py | 6 +++--- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/mobility_data/importers/bicycle_stands.py b/mobility_data/importers/bicycle_stands.py index a1b3b4ec4..df59b8c2b 100644 --- a/mobility_data/importers/bicycle_stands.py +++ b/mobility_data/importers/bicycle_stands.py @@ -2,6 +2,7 @@ Note, bicycle stands are not imorter via the wfs importer as it needs logic to derive if the stand is hull lockable or covered. """ + import logging import os diff --git a/mobility_data/importers/bike_service_stations.py b/mobility_data/importers/bike_service_stations.py index 3dff7e082..a50fcabf9 100644 --- a/mobility_data/importers/bike_service_stations.py +++ b/mobility_data/importers/bike_service_stations.py @@ -50,9 +50,9 @@ def __init__(self, feature): # If no swedish address, retrieve it from the database. if language == "sv": street_name, number = addresses[0].split(" ") - self.address[ - language - ] = f"{get_street_name_translations(street_name, municipality)[language]} number" + self.address[language] = ( + f"{get_street_name_translations(street_name, municipality)[language]} number" + ) # Source data does not contain English addresses, assign the Finnsh else: self.address[language] = addresses[0] diff --git a/mobility_data/importers/marinas.py b/mobility_data/importers/marinas.py index a2be3283a..1ae436181 100644 --- a/mobility_data/importers/marinas.py +++ b/mobility_data/importers/marinas.py @@ -3,6 +3,7 @@ Note, wfs importer is not used as the berths data is separately assigned to the marina mobile units. """ + import logging from django.conf import settings diff --git a/mobility_data/importers/share_car_parking_places.py b/mobility_data/importers/share_car_parking_places.py index bed241ed1..25f7898cc 100644 --- a/mobility_data/importers/share_car_parking_places.py +++ b/mobility_data/importers/share_car_parking_places.py @@ -47,9 +47,9 @@ def __init__(self, feature): street_name["en"] = street_name["fi"] self.extra[self.RESTRICTION_FIELD] = {} for i, language in enumerate(LANGUAGES): - self.name[ - language - ] = f"{self.CAR_PARKING_NAME[language]}, {street_name[language]}" + self.name[language] = ( + f"{self.CAR_PARKING_NAME[language]}, {street_name[language]}" + ) self.address[language] = street_name[language] self.extra[self.RESTRICTION_FIELD][language] = restrictions[i].strip() From 4562cb2ce76ed47e8598e9e093fe356511b7da60 Mon Sep 17 00:00:00 2001 From: juuso-j <68938778+juuso-j@users.noreply.github.com> Date: Tue, 16 Apr 2024 11:04:09 +0300 Subject: [PATCH 06/47] Format --- mobility_data/tests/test_import_accessories.py | 1 + mobility_data/tests/test_import_payment_zones.py | 1 + mobility_data/tests/test_import_speed_limits.py | 1 + 3 files changed, 3 insertions(+) diff --git a/mobility_data/tests/test_import_accessories.py b/mobility_data/tests/test_import_accessories.py index 3e89549cc..2cefd5de8 100644 --- a/mobility_data/tests/test_import_accessories.py +++ b/mobility_data/tests/test_import_accessories.py @@ -7,6 +7,7 @@ has been removed from the test input data, as it causes GDAL DataSource to fail when loading data. """ + from unittest.mock import patch import pytest diff --git a/mobility_data/tests/test_import_payment_zones.py b/mobility_data/tests/test_import_payment_zones.py index 5f9e7f6fc..17150ef8a 100644 --- a/mobility_data/tests/test_import_payment_zones.py +++ b/mobility_data/tests/test_import_payment_zones.py @@ -6,6 +6,7 @@ has been removed from the test input data, as it causes GDAL DataSource to fail when loading data. """ + from unittest.mock import patch import pytest diff --git a/mobility_data/tests/test_import_speed_limits.py b/mobility_data/tests/test_import_speed_limits.py index 28f3d8771..43d924ace 100644 --- a/mobility_data/tests/test_import_speed_limits.py +++ b/mobility_data/tests/test_import_speed_limits.py @@ -8,6 +8,7 @@ has been removed from the test input data, as it causes GDAL DataSource to fail when loading data. """ + import pytest from django.conf import settings From da189e5c3830e3611b08f6822d5fe8fcb8675acb Mon Sep 17 00:00:00 2001 From: juuso-j <68938778+juuso-j@users.noreply.github.com> Date: Tue, 16 Apr 2024 11:04:58 +0300 Subject: [PATCH 07/47] Format --- services/api.py | 12 ++++++------ services/content_metrics.py | 1 + services/search/api.py | 7 ++++--- .../utils/accessibility_shortcoming_calculator.py | 8 +++++--- 4 files changed, 16 insertions(+), 12 deletions(-) diff --git a/services/api.py b/services/api.py index 5fd368141..9f57361ee 100644 --- a/services/api.py +++ b/services/api.py @@ -134,9 +134,9 @@ def to_internal_value(self, data): value = obj[language] # "musiikkiklubit" if language == settings.LANGUAGES[0][0]: # default language extra_fields[field_name] = value # { "name": "musiikkiklubit" } - extra_fields[ - "{}_{}".format(field_name, language) - ] = value # { "name_fi": "musiikkiklubit" } + extra_fields["{}_{}".format(field_name, language)] = ( + value # { "name_fi": "musiikkiklubit" } + ) del data[field_name] # delete original translated fields # handle other than translated fields @@ -733,9 +733,9 @@ def to_representation(self, obj): if "accessibility_shortcoming_count" in getattr( self, "keep_fields", ["accessibility_shortcoming_count"] ): - ret[ - "accessibility_shortcoming_count" - ] = shortcomings.accessibility_shortcoming_count + ret["accessibility_shortcoming_count"] = ( + shortcomings.accessibility_shortcoming_count + ) if "request" not in self.context: return ret diff --git a/services/content_metrics.py b/services/content_metrics.py index af43aba7a..e7607cc26 100644 --- a/services/content_metrics.py +++ b/services/content_metrics.py @@ -5,6 +5,7 @@ with either long field contents or a large amount of related objects. """ + from django.db.models import Case, Count, IntegerField, Sum, When from django.db.models.functions import Length diff --git a/services/search/api.py b/services/search/api.py index 10408ef68..800df98c2 100644 --- a/services/search/api.py +++ b/services/search/api.py @@ -17,6 +17,7 @@ - The search_columns can be manually updated with the index_search_columns and emptied with the empty_search_columns management script. """ + import logging import re from itertools import chain @@ -127,9 +128,9 @@ def to_representation(self, obj): shortcomings = obj.accessibility_shortcomings except UnitAccessibilityShortcomings.DoesNotExist: shortcomings = UnitAccessibilityShortcomings() - representation[ - "accessibility_shortcoming_count" - ] = shortcomings.accessibility_shortcoming_count + representation["accessibility_shortcoming_count"] = ( + shortcomings.accessibility_shortcoming_count + ) representation["contract_type"] = UnitSerializer.get_contract_type( self, obj ) diff --git a/services/utils/accessibility_shortcoming_calculator.py b/services/utils/accessibility_shortcoming_calculator.py index 202d20b6c..1c587c639 100644 --- a/services/utils/accessibility_shortcoming_calculator.py +++ b/services/utils/accessibility_shortcoming_calculator.py @@ -131,9 +131,11 @@ def _calculate_shortcomings(self, rule, properties, messages, profile_id): "{}: {} {}".format( rule["id"], rule["operator"], - "{}recorded".format("" if message_recorded else "not ") - if not is_ok - else "passed", + ( + "{}recorded".format("" if message_recorded else "not ") + if not is_ok + else "passed" + ), ) ) return is_ok, message_recorded From 8acb2eaad0f235461d405daebf81eea865d7fb89 Mon Sep 17 00:00:00 2001 From: juuso-j <68938778+juuso-j@users.noreply.github.com> Date: Tue, 16 Apr 2024 11:05:32 +0300 Subject: [PATCH 08/47] Format --- smbackend/settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/smbackend/settings.py b/smbackend/settings.py index 59bfb9b55..9263363aa 100644 --- a/smbackend/settings.py +++ b/smbackend/settings.py @@ -223,7 +223,7 @@ def gettext(s): 991, # health stations 1097, # basic education 2125, # pre school education - 869 # municipal day care + 869, # municipal day care # 25344, # recycling # 25480, # public libraries ], From ff96f71159884c163d23a86ad874562d92ff20bc Mon Sep 17 00:00:00 2001 From: juuso-j <68938778+juuso-j@users.noreply.github.com> Date: Tue, 16 Apr 2024 11:06:12 +0300 Subject: [PATCH 09/47] Format --- smbackend_turku/importers/divisions.py | 1 + smbackend_turku/importers/geo_search.py | 12 ++++++------ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/smbackend_turku/importers/divisions.py b/smbackend_turku/importers/divisions.py index 88ed163b4..f53b0d685 100644 --- a/smbackend_turku/importers/divisions.py +++ b/smbackend_turku/importers/divisions.py @@ -4,6 +4,7 @@ and modified to fit the WFS server of Turku. """ + import os import re from datetime import datetime diff --git a/smbackend_turku/importers/geo_search.py b/smbackend_turku/importers/geo_search.py index cef01c379..d22f74e91 100644 --- a/smbackend_turku/importers/geo_search.py +++ b/smbackend_turku/importers/geo_search.py @@ -205,9 +205,9 @@ def save_page(self, results, municipality): for result in results: postal_code = result["postal_code_area"]["postal_code"] if postal_code not in self.postal_code_areas_cache: - self.postal_code_areas_cache[ - postal_code - ] = self.get_or_create_postal_code_area(postal_code, result) + self.postal_code_areas_cache[postal_code] = ( + self.get_or_create_postal_code_area(postal_code, result) + ) ( street_name_fi, @@ -353,9 +353,9 @@ def enrich_page(self, results, municipality): postal_code = result["postal_code_area"]["postal_code"] if postal_code not in self.postal_code_areas_cache: - self.postal_code_areas_cache[ - postal_code - ] = self.get_or_create_postal_code_area(postal_code, result) + self.postal_code_areas_cache[postal_code] = ( + self.get_or_create_postal_code_area(postal_code, result) + ) # name_sv is not added as there might be a swedish translation street_entry = { "name": street_name_fi, From 7fc02c37bdfdbfd4afd76215683b382819cc3526 Mon Sep 17 00:00:00 2001 From: juuso-j <68938778+juuso-j@users.noreply.github.com> Date: Tue, 16 Apr 2024 11:09:05 +0300 Subject: [PATCH 10/47] Format --- mobility_data/management/commands/import_mobility_data.py | 1 + 1 file changed, 1 insertion(+) diff --git a/mobility_data/management/commands/import_mobility_data.py b/mobility_data/management/commands/import_mobility_data.py index bba7c77a9..018d5a7be 100644 --- a/mobility_data/management/commands/import_mobility_data.py +++ b/mobility_data/management/commands/import_mobility_data.py @@ -1,6 +1,7 @@ """ Imports all mobility data sources. """ + import logging from django.core import management From 4784e4eb8349706b1cac7108a3a2808a42905c32 Mon Sep 17 00:00:00 2001 From: juuso-j <68938778+juuso-j@users.noreply.github.com> Date: Tue, 16 Apr 2024 12:13:51 +0300 Subject: [PATCH 11/47] Add API documentation --- services/search/api.py | 125 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 125 insertions(+) diff --git a/services/search/api.py b/services/search/api.py index 800df98c2..9ace36f43 100644 --- a/services/search/api.py +++ b/services/search/api.py @@ -24,6 +24,7 @@ from django.db import connection, reset_queries from django.db.models import Count +from drf_spectacular.utils import extend_schema, OpenApiParameter from munigeo import api as munigeo_api from munigeo.models import Address, AdministrativeDivision from rest_framework import serializers @@ -182,6 +183,130 @@ def to_representation(self, obj): return representation +@extend_schema( + parameters=[ + OpenApiParameter( + name="q", + location=OpenApiParameter.QUERY, + description="The query string used for searching. Searches the search_columns for the given models. Commas " + "between words are interpreted as 'and' operator. Words ending with the '|' sign are interpreted as 'or' " + "operator.", + required=False, + type=str, + ), + OpenApiParameter( + name="type", + location=OpenApiParameter.QUERY, + description="Comma separated list of types to search for. Valid values are: unit, service, servicenode, " + "address, administrativedivision. If not given defaults to all.", + required=False, + type=str, + ), + OpenApiParameter( + name="use_trigram", + location=OpenApiParameter.QUERY, + description="Comma separated list of types that will include trigram results in search if no results are " + "found. Valid values are: unit, service, servicenode, address, administrativedivision. If not given " + "trigram will not be used.", + required=False, + type=str, + ), + OpenApiParameter( + name="trigram_threshold", + location=OpenApiParameter.QUERY, + description="Threshold value for trigram search. If not given defaults to 0.15.", + required=False, + type=float, + ), + OpenApiParameter( + name="rank_threshold", + location=OpenApiParameter.QUERY, + description="Include results with search rank greater than or equal to the value. If not given defaults to " + "0.", + required=False, + type=float, + ), + OpenApiParameter( + name="use_websearch", + location=OpenApiParameter.QUERY, + description="Use websearch_to_tsquery instead of to_tsquery if exlusion rules are defined for the search.", + required=False, + type=bool, + ), + OpenApiParameter( + name="geometry", + location=OpenApiParameter.QUERY, + description="Display geometry of the search result. If not given defaults to false.", + required=False, + type=bool, + ), + OpenApiParameter( + name="order_units_by_num_services", + location=OpenApiParameter.QUERY, + description="Order units by number of services. If not given defaults to true.", + required=False, + type=bool, + ), + OpenApiParameter( + name="include", + location=OpenApiParameter.QUERY, + description="Comma separated list of fields to include in the response. Format: entity.field, e.g., " + "unit.connections.", + required=False, + type=str, + ), + OpenApiParameter( + name="sql_query_limit", + location=OpenApiParameter.QUERY, + description="Limit the number of results in the search query.", + required=False, + type=int, + ), + OpenApiParameter( + name="unit_limit", + location=OpenApiParameter.QUERY, + description="Limit the number of units in the search results.", + required=False, + type=int, + ), + OpenApiParameter( + name="service_limit", + location=OpenApiParameter.QUERY, + description="Limit the number of services in the search results.", + required=False, + type=int, + ), + OpenApiParameter( + name="servicenode_limit", + location=OpenApiParameter.QUERY, + description="Limit the number of service nodes in the search results.", + required=False, + type=int, + ), + OpenApiParameter( + name="administrativedivision_limit", + location=OpenApiParameter.QUERY, + description="Limit the number of administrative divisions in the search results.", + required=False, + type=int, + ), + OpenApiParameter( + name="address_limit", + location=OpenApiParameter.QUERY, + description="Limit the number of addresses in the search results.", + required=False, + type=int, + ), + OpenApiParameter( + name="language", + location=OpenApiParameter.QUERY, + description="The language to be used in the search. If not given defaults to Finnish. Format: fi, sv, en.", + required=False, + type=str, + ), + ], + description="Search for units, services, service nodes, addresses and administrative divisions.", +) class SearchViewSet(GenericAPIView): queryset = Unit.objects.all() From 1f6cd79e4eb56f3d88bdf90d5226aa9b152af542 Mon Sep 17 00:00:00 2001 From: juuso-j <68938778+juuso-j@users.noreply.github.com> Date: Tue, 16 Apr 2024 12:21:58 +0300 Subject: [PATCH 12/47] Parametrize and validate input value in search query Parametrize the input value in the search query to enhance security by separating user input from the SQL query. Special characters in query input often raised syntax error in tsquery. --- services/search/api.py | 9 ++++-- services/search/tests/test_api.py | 48 +++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 2 deletions(-) diff --git a/services/search/api.py b/services/search/api.py index 9ace36f43..e24918c5a 100644 --- a/services/search/api.py +++ b/services/search/api.py @@ -322,6 +322,11 @@ def get(self, request): if not q_val: raise ParseError("Supply search terms with 'q=' ' or input=' '") + if not re.match(r"^[\w\såäö&|-]+$", q_val): + raise ParseError( + "Invalid search terms, only letters, numbers, spaces and -&| allowed." + ) + types_str = ",".join([elem for elem in QUERY_PARAM_TYPE_NAMES]) types = params.get("type", types_str).split(",") if "use_trigram" in self.request.query_params: @@ -424,13 +429,13 @@ def get(self, request): # and by rankig gives better results, e.g. extra fields weight is counted. sql = f""" SELECT id, type_name, name_{language_short}, ts_rank_cd(search_column_{language_short}, search_query) - AS rank FROM search_view, {search_fn}('{config_language}','{search_query_str}') search_query + AS rank FROM search_view, {search_fn}('{config_language}', %s) search_query WHERE search_query @@ search_column_{language_short} ORDER BY rank DESC LIMIT {sql_query_limit}; """ cursor = connection.cursor() - cursor.execute(sql) + cursor.execute(sql, [search_query_str]) # Note, fetchall() consumes the results and once called returns None. all_results = cursor.fetchall() all_ids = get_all_ids_from_sql_results(all_results) diff --git a/services/search/tests/test_api.py b/services/search/tests/test_api.py index d79c0cbb2..72cfb4b67 100644 --- a/services/search/tests/test_api.py +++ b/services/search/tests/test_api.py @@ -153,3 +153,51 @@ def test_search( ) response = api_client.get(url) assert len(response.json()["results"]) == 4 + + +@pytest.mark.django_db +def test_search_input_query_validation(api_client): + # Test that | is allowed in query + url = reverse("search") + "?q=halli|museo" + response = api_client.get(url) + assert response.status_code == 200 + + # Test that & is allowed in query + url = reverse("search") + "?q=halli&museo" + response = api_client.get(url) + assert response.status_code == 200 + + # Test that - is allowed in query + url = reverse("search") + "?q=linja-auto" + response = api_client.get(url) + assert response.status_code == 200 + + # Test that " " is allowed in query + url = reverse("search") + "?q=Keskustakirjasto Oodi" + response = api_client.get(url) + assert response.status_code == 200 + + # Test that "ääkköset" are allowed in query + url = reverse("search") + "?q=lääkäri" + response = api_client.get(url) + assert response.status_code == 200 + url = reverse("search") + "?q=röntgen" + response = api_client.get(url) + assert response.status_code == 200 + url = reverse("search") + "?q=åbo" + response = api_client.get(url) + assert response.status_code == 200 + + # Test that numbers are allowed in query + url = reverse("search") + "?q=123" + response = api_client.get(url) + assert response.status_code == 200 + + # Test that special characters are not allowed in query + url = reverse("search") + "?q=halli(" + response = api_client.get(url) + assert response.status_code == 400 + assert ( + response.json()["detail"] + == "Invalid search terms, only letters, numbers, spaces and -&| allowed." + ) From 57f38739d82079c6d21e2b5a5c55772a38701455 Mon Sep 17 00:00:00 2001 From: juuso-j <68938778+juuso-j@users.noreply.github.com> Date: Tue, 16 Apr 2024 12:55:45 +0300 Subject: [PATCH 13/47] Accept plus sign in search validation --- services/search/api.py | 4 ++-- services/search/tests/test_api.py | 7 ++++++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/services/search/api.py b/services/search/api.py index e24918c5a..07a36d4af 100644 --- a/services/search/api.py +++ b/services/search/api.py @@ -322,9 +322,9 @@ def get(self, request): if not q_val: raise ParseError("Supply search terms with 'q=' ' or input=' '") - if not re.match(r"^[\w\såäö&|-]+$", q_val): + if not re.match(r"^[\w\såäö+&|-]+$", q_val): raise ParseError( - "Invalid search terms, only letters, numbers, spaces and -&| allowed." + "Invalid search terms, only letters, numbers, spaces and +-&| allowed." ) types_str = ",".join([elem for elem in QUERY_PARAM_TYPE_NAMES]) diff --git a/services/search/tests/test_api.py b/services/search/tests/test_api.py index 72cfb4b67..c44954974 100644 --- a/services/search/tests/test_api.py +++ b/services/search/tests/test_api.py @@ -177,6 +177,11 @@ def test_search_input_query_validation(api_client): response = api_client.get(url) assert response.status_code == 200 + # Test that + is allowed in query + url = reverse("search") + "?q=Keskustakirjasto+Oodi" + response = api_client.get(url) + assert response.status_code == 200 + # Test that "ääkköset" are allowed in query url = reverse("search") + "?q=lääkäri" response = api_client.get(url) @@ -199,5 +204,5 @@ def test_search_input_query_validation(api_client): assert response.status_code == 400 assert ( response.json()["detail"] - == "Invalid search terms, only letters, numbers, spaces and -&| allowed." + == "Invalid search terms, only letters, numbers, spaces and +-&| allowed." ) From 33af9665f4f71cd76f293e571c6845ba5a9b812c Mon Sep 17 00:00:00 2001 From: juuso-j <68938778+juuso-j@users.noreply.github.com> Date: Tue, 16 Apr 2024 13:02:07 +0300 Subject: [PATCH 14/47] Catch potential tsquery errors more gracefully --- services/search/api.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/services/search/api.py b/services/search/api.py index 07a36d4af..89356ecdd 100644 --- a/services/search/api.py +++ b/services/search/api.py @@ -435,7 +435,11 @@ def get(self, request): """ cursor = connection.cursor() - cursor.execute(sql, [search_query_str]) + try: + cursor.execute(sql, [search_query_str]) + except Exception as e: + logger.error(f"Error in search query: {e}") + raise ParseError("Search query failed.") # Note, fetchall() consumes the results and once called returns None. all_results = cursor.fetchall() all_ids = get_all_ids_from_sql_results(all_results) From 004675b62dd48c80b1376b770117896ab9d55673 Mon Sep 17 00:00:00 2001 From: juuso-j <68938778+juuso-j@users.noreply.github.com> Date: Tue, 16 Apr 2024 14:34:58 +0300 Subject: [PATCH 15/47] Filter by rank and hyphenate only compound words --- services/search/api.py | 22 ++++++++++++++++------ services/search/tests/test_api.py | 3 ++- services/search/utils.py | 22 +++++++++++++--------- 3 files changed, 31 insertions(+), 16 deletions(-) diff --git a/services/search/api.py b/services/search/api.py index 89356ecdd..c7fa60bfb 100644 --- a/services/search/api.py +++ b/services/search/api.py @@ -47,6 +47,7 @@ from .constants import ( DEFAULT_MODEL_LIMIT_VALUE, + DEFAULT_RANK_THRESHOLD, DEFAULT_SEARCH_SQL_LIMIT_VALUE, DEFAULT_SRS, DEFAULT_TRIGRAM_THRESHOLD, @@ -340,10 +341,18 @@ def get(self, request): try: trigram_threshold = float(params.get("trigram_threshold")) except ValueError: - raise ParseError("'trigram_threshold' need to be of type float.") + raise ParseError("'trigram_threshold' needs to be of type float.") else: trigram_threshold = DEFAULT_TRIGRAM_THRESHOLD + if "rank_threshold" in params: + try: + rank_threshold = float(params.get("rank_threshold")) + except ValueError: + raise ParseError("'rank_threshold' needs to be of type float.") + else: + rank_threshold = DEFAULT_RANK_THRESHOLD + if "use_websearch" in params: try: use_websearch = strtobool(params["use_websearch"]) @@ -428,12 +437,13 @@ def get(self, request): # This is ~100 times faster than using Djangos SearchRank and allows searching using wildard "|*" # and by rankig gives better results, e.g. extra fields weight is counted. sql = f""" - SELECT id, type_name, name_{language_short}, ts_rank_cd(search_column_{language_short}, search_query) - AS rank FROM search_view, {search_fn}('{config_language}', %s) search_query - WHERE search_query @@ search_column_{language_short} - ORDER BY rank DESC LIMIT {sql_query_limit}; + SELECT * from ( + SELECT id, type_name, name_{language_short}, ts_rank_cd(search_column_{language_short}, search_query) + AS rank FROM search_view, {search_fn}('{config_language}','{search_query_str}') search_query + WHERE search_query @@ search_column_{language_short} + ORDER BY rank DESC LIMIT {sql_query_limit} + ) AS sub_query where sub_query.rank >= {rank_threshold}; """ - cursor = connection.cursor() try: cursor.execute(sql, [search_query_str]) diff --git a/services/search/tests/test_api.py b/services/search/tests/test_api.py index c44954974..ee828c45c 100644 --- a/services/search/tests/test_api.py +++ b/services/search/tests/test_api.py @@ -79,7 +79,8 @@ def test_search( assert museum_service_node["unit_count"]["municipality"]["turku"] == 1 assert museum_service_node["unit_count"]["total"] == 1 # Test that unit "Impivara" is retrieved from service Uimahalli - url = reverse("search") + "?q=uimahalli&type=unit" + url = reverse("search") + "?q=uimahalli&type=unit&rank_threshold=0" + response = api_client.get(url) results = response.json()["results"] assert results[0]["name"]["fi"] == "Impivaara" diff --git a/services/search/utils.py b/services/search/utils.py index e8611b990..36434e433 100644 --- a/services/search/utils.py +++ b/services/search/utils.py @@ -5,7 +5,6 @@ from services.models import ExclusionRule, ServiceNode, ServiceNodeUnitCount, Unit from services.search.constants import ( DEFAULT_TRIGRAM_THRESHOLD, - LENGTH_OF_HYPHENATED_WORDS, SEARCHABLE_MODEL_TYPE_NAMES, ) @@ -13,17 +12,22 @@ voikko.setNoUglyHyphenation(True) +def is_compound_word(word): + result = voikko.analyze(word) + if len(result) == 0: + return False + return True if result[0]["WORDBASES"].count("+") > 1 else False + + def hyphenate(word): """ - Returns a list of syllables of the word if word length - is >= LENGTH_OF_HYPHENATE_WORDS + Returns a list of syllables of the word if it is a compound word. """ - word_length = len(word) - if word_length >= LENGTH_OF_HYPHENATED_WORDS: - # By Setting the value to word_length, voikko returns - # the words that are in the compound word, if the word is - # not a compound word it returns the syllables as normal. - voikko.setMinHyphenatedWordLength(word_length) + word = word.strip() + if is_compound_word(word): + # By Setting the setMinHyphenatedWordLength to word_length, + # voikko returns the words that are in the compound word + voikko.setMinHyphenatedWordLength(len(word)) syllables = voikko.hyphenate(word) return syllables.split("-") else: From e23b7ba1a6154c726cfa0197fe3609fab81d98ab Mon Sep 17 00:00:00 2001 From: juuso-j <68938778+juuso-j@users.noreply.github.com> Date: Tue, 16 Apr 2024 15:19:00 +0300 Subject: [PATCH 16/47] Fix service search results order Fix a bug in `services_qs` ordering in search api and add tests for it. --- services/search/api.py | 3 ++- services/search/constants.py | 3 +-- services/search/tests/conftest.py | 44 ++++++++++++++++++++++--------- services/search/tests/test_api.py | 19 +++++++++++++ 4 files changed, 54 insertions(+), 15 deletions(-) diff --git a/services/search/api.py b/services/search/api.py index c7fa60bfb..20847d143 100644 --- a/services/search/api.py +++ b/services/search/api.py @@ -473,8 +473,9 @@ def get(self, request): ) services_qs = services_qs.annotate(num_units=Count("units")).order_by( - "-units__count" + "-num_units" ) + # order_by() call makes duplicate rows appear distinct. This is solved by # fetching the ids and filtering a new queryset using them ids = list(services_qs.values_list("id", flat=True)) diff --git a/services/search/constants.py b/services/search/constants.py index 5bb0e4e87..967e37a78 100644 --- a/services/search/constants.py +++ b/services/search/constants.py @@ -16,5 +16,4 @@ # The limit value for the search query that search the search_view. "NULL" = no limit DEFAULT_SEARCH_SQL_LIMIT_VALUE = "NULL" DEFAULT_TRIGRAM_THRESHOLD = 0.15 -# If word length is greater or equal then hyphenate word. -LENGTH_OF_HYPHENATED_WORDS = 8 +DEFAULT_RANK_THRESHOLD = 1 diff --git a/services/search/tests/conftest.py b/services/search/tests/conftest.py index 3d7bd4cde..b11cadb58 100644 --- a/services/search/tests/conftest.py +++ b/services/search/tests/conftest.py @@ -36,7 +36,6 @@ def api_client(): return APIClient() -@pytest.mark.django_db @pytest.fixture def units( services, @@ -106,15 +105,35 @@ def units( ) unit.services.add(5) unit.save() + unit = Unit.objects.create( + id=6, + name="Jäähalli", + last_modified_time=now(), + municipality=municipality, + department=department, + ) + # Add service Halli + unit.services.add(6) + unit.save() + + unit = Unit.objects.create( + id=7, + name="Palloiluhalli", + last_modified_time=now(), + municipality=municipality, + department=department, + ) + # Add service Halli + unit.services.add(6) + unit.save() update_service_root_service_nodes() update_service_counts() update_service_node_counts() generate_syllables(Unit) Unit.objects.update(search_column_fi=get_search_column(Unit, "fi")) - return Unit.objects.all() + return Unit.objects.all().order_by("id") -@pytest.mark.django_db @pytest.fixture def department(municipality): return Department.objects.create( @@ -125,7 +144,6 @@ def department(municipality): ) -@pytest.mark.django_db @pytest.fixture def accessibility_shortcoming(units): unit = Unit.objects.get(name="Biologinen museo") @@ -134,7 +152,6 @@ def accessibility_shortcoming(units): ) -@pytest.mark.django_db @pytest.fixture def services(): Service.objects.create( @@ -167,12 +184,21 @@ def services(): name_sv="konstisbanor", last_modified_time=now(), ) + Service.objects.create( + id=6, + name="Halli", + last_modified_time=now(), + ) + Service.objects.create( + id=7, + name="Hallinto", + last_modified_time=now(), + ) generate_syllables(Service) Service.objects.update(search_column_fi=get_search_column(Service, "fi")) return Service.objects.all() -@pytest.mark.django_db @pytest.fixture def service_nodes(services): leisure = ServiceNode.objects.create( @@ -196,7 +222,6 @@ def service_nodes(services): return ServiceNode.objects.all() -@pytest.mark.django_db @pytest.fixture def addresses(streets, municipality): Address.objects.create( @@ -248,7 +273,6 @@ def addresses(streets, municipality): return Address.objects.all() -@pytest.mark.django_db @pytest.fixture def municipality(): return Municipality.objects.create( @@ -256,7 +280,6 @@ def municipality(): ) -@pytest.mark.django_db @pytest.fixture def administrative_division_type(): return AdministrativeDivisionType.objects.get_or_create( @@ -264,7 +287,6 @@ def administrative_division_type(): ) -@pytest.mark.django_db @pytest.fixture def administrative_division(administrative_division_type): adm_div = AdministrativeDivision.objects.get_or_create( @@ -276,7 +298,6 @@ def administrative_division(administrative_division_type): return adm_div -@pytest.mark.django_db @pytest.fixture def streets(): Street.objects.create( @@ -287,7 +308,6 @@ def streets(): return Street.objects.all() -@pytest.mark.django_db @pytest.fixture def exclusion_rules(): ExclusionRule.objects.create(id=1, word="tekojää", exclusion="-nurmi") diff --git a/services/search/tests/test_api.py b/services/search/tests/test_api.py index ee828c45c..46fa2e781 100644 --- a/services/search/tests/test_api.py +++ b/services/search/tests/test_api.py @@ -207,3 +207,22 @@ def test_search_input_query_validation(api_client): response.json()["detail"] == "Invalid search terms, only letters, numbers, spaces and +-&| allowed." ) + + +@pytest.mark.django_db +def test_search_service_order(api_client, units, services): + """ + Test that services are ordered descending by unit count. + """ + url = reverse("search") + "?q=halli&type=service" + response = api_client.get(url) + results = response.json()["results"] + assert len(results) == 3 + assert results[0]["name"]["fi"] == "Halli" + assert results[0]["unit_count"]["total"] == 2 + + assert results[1]["name"]["fi"] == "Uimahalli" + assert results[1]["unit_count"]["total"] == 1 + + assert results[2]["name"]["fi"] == "Hallinto" + assert results[2]["unit_count"]["total"] == 0 From 4146554e46865634cd7a37ff9ce0a989870b6bdc Mon Sep 17 00:00:00 2001 From: juuso-j <68938778+juuso-j@users.noreply.github.com> Date: Wed, 17 Apr 2024 10:31:17 +0300 Subject: [PATCH 17/47] Accept dot and apostrophe in search validation Accept dot and apostrophe in search validation. Do not strip apostrophes from search query to enable searching for words with apostrophes. --- services/search/api.py | 10 +++++----- services/search/tests/conftest.py | 9 +++++++++ services/search/tests/test_api.py | 18 +++++++++++++++++- services/search/utils.py | 7 +++---- 4 files changed, 34 insertions(+), 10 deletions(-) diff --git a/services/search/api.py b/services/search/api.py index 20847d143..68343fc29 100644 --- a/services/search/api.py +++ b/services/search/api.py @@ -323,9 +323,10 @@ def get(self, request): if not q_val: raise ParseError("Supply search terms with 'q=' ' or input=' '") - if not re.match(r"^[\w\såäö+&|-]+$", q_val): + if not re.match(r"^[\w\såäö.'+&|-]+$", q_val): + raise ParseError( - "Invalid search terms, only letters, numbers, spaces and +-&| allowed." + "Invalid search terms, only letters, numbers, spaces and .'+-&| allowed." ) types_str = ",".join([elem for elem in QUERY_PARAM_TYPE_NAMES]) @@ -417,10 +418,9 @@ def get(self, request): # Build conditional query string that is used in the SQL query. # split by "," or whitespace q_vals = re.split(r",\s+|\s+", q_val) - q_vals = [s.strip().replace("'", "") for s in q_vals] for q in q_vals: if search_query_str: - # if ends with "|"" make it a or + # if ends with "|" make it a or if q[-1] == "|": search_query_str += f"| {q[:-1]}:*" # else make it an and. @@ -439,7 +439,7 @@ def get(self, request): sql = f""" SELECT * from ( SELECT id, type_name, name_{language_short}, ts_rank_cd(search_column_{language_short}, search_query) - AS rank FROM search_view, {search_fn}('{config_language}','{search_query_str}') search_query + AS rank FROM search_view, {search_fn}('{config_language}', %s) search_query WHERE search_query @@ search_column_{language_short} ORDER BY rank DESC LIMIT {sql_query_limit} ) AS sub_query where sub_query.rank >= {rank_threshold}; diff --git a/services/search/tests/conftest.py b/services/search/tests/conftest.py index b11cadb58..020e1a5d8 100644 --- a/services/search/tests/conftest.py +++ b/services/search/tests/conftest.py @@ -269,6 +269,14 @@ def addresses(streets, municipality): number=33, full_name="Yliopistonkatu 33", ) + Address.objects.create( + municipality_id=municipality.id, + location=Point(60.1612283, 24.9478104), + id=6, + street_id=45, + number=1, + full_name="Tarkk'ampujankatu 1", + ) Address.objects.update(search_column_fi=get_search_column(Address, "fi")) return Address.objects.all() @@ -305,6 +313,7 @@ def streets(): ) Street.objects.create(id=43, name="Markulantie", municipality_id="turku") Street.objects.create(id=44, name="Yliopistonkatu", municipality_id="turku") + Street.objects.create(id=45, name="Tarkk'ampujankatu", municipality_id="turku") return Street.objects.all() diff --git a/services/search/tests/test_api.py b/services/search/tests/test_api.py index 46fa2e781..99d8b2c3d 100644 --- a/services/search/tests/test_api.py +++ b/services/search/tests/test_api.py @@ -121,6 +121,12 @@ def test_search( assert kurrapolku["location"]["type"] == "Point" assert kurrapolku["location"]["coordinates"][0] == 60.479032 assert kurrapolku["location"]["coordinates"][1] == 22.25417 + # Test address search with apostrophe in query + url = reverse("search") + "?q=tarkk'ampujankatu&type=address" + response = api_client.get(url) + results = response.json()["results"] + assert len(results) == 1 + assert results[0]["name"]["fi"] == "Tarkk'ampujankatu 1" # Test that addresses are sorted by naturalsort. url = reverse("search") + "?q=yliopistonkatu&type=address" response = api_client.get(url) @@ -199,13 +205,23 @@ def test_search_input_query_validation(api_client): response = api_client.get(url) assert response.status_code == 200 + # Test that . is allowed in query + url = reverse("search") + "?q=halli.museo" + response = api_client.get(url) + assert response.status_code == 200 + + # Test that ' is allowed in query + url = reverse("search") + "?q=halli's" + response = api_client.get(url) + assert response.status_code == 200 + # Test that special characters are not allowed in query url = reverse("search") + "?q=halli(" response = api_client.get(url) assert response.status_code == 400 assert ( response.json()["detail"] - == "Invalid search terms, only letters, numbers, spaces and +-&| allowed." + == "Invalid search terms, only letters, numbers, spaces and .'+-&| allowed." ) diff --git a/services/search/utils.py b/services/search/utils.py index 36434e433..494742998 100644 --- a/services/search/utils.py +++ b/services/search/utils.py @@ -186,15 +186,14 @@ def get_preserved_order(ids): def get_trigram_results( model, model_name, field, q_val, threshold=DEFAULT_TRIGRAM_THRESHOLD ): - sql = f"""SELECT id, similarity({field}, '{q_val}') AS sml + sql = f"""SELECT id, similarity({field}, %s) AS sml FROM {model_name} - WHERE similarity({field}, '{q_val}') >= {threshold} + WHERE similarity({field},%s) >= {threshold} ORDER BY sml DESC; """ cursor = connection.cursor() - cursor.execute(sql) + cursor.execute(sql, [q_val, q_val]) all_results = cursor.fetchall() - ids = [row[0] for row in all_results] objs = model.objects.filter(id__in=ids) return objs From 1145aa7ab69b65872fb54b270a6678fb2c8f7eb4 Mon Sep 17 00:00:00 2001 From: juuso-j <68938778+juuso-j@users.noreply.github.com> Date: Wed, 17 Apr 2024 11:05:06 +0300 Subject: [PATCH 18/47] Add option to disable provider_type ordering in search --- services/search/api.py | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/services/search/api.py b/services/search/api.py index 68343fc29..39046fe67 100644 --- a/services/search/api.py +++ b/services/search/api.py @@ -248,6 +248,13 @@ def to_representation(self, obj): required=False, type=bool, ), + OpenApiParameter( + name="order_units_by_provider_type", + location=OpenApiParameter.QUERY, + description="Order units by provider type. If not given defaults to true.", + required=False, + type=bool, + ), OpenApiParameter( name="include", location=OpenApiParameter.QUERY, @@ -314,7 +321,7 @@ class SearchViewSet(GenericAPIView): def get(self, request): model_limits = {} show_only_address = False - units_order_list = ["provider_type"] + units_order_list = [] for model in list(QUERY_PARAM_TYPE_NAMES): model_limits[model] = DEFAULT_MODEL_LIMIT_VALUE @@ -383,6 +390,18 @@ def get(self, request): if order_units_by_num_services: units_order_list.append("-num_services") + if "order_units_by_provider_type" in params: + try: + order_units_by_provider_type = strtobool( + params["order_units_by_provider_type"] + ) + except ValueError: + raise ParseError("'order_units_by_provider_type' needs to be a boolean") + else: + order_units_by_provider_type = True + + if order_units_by_provider_type: + units_order_list.append("provider_type") if "include" in params: include_fields = params["include"].split(",") else: @@ -515,9 +534,12 @@ def get(self, request): services = self.request.query_params["service"].strip().split(",") if services[0]: units_qs = units_qs.filter(services__in=services) - units_qs = units_qs.annotate(num_services=Count("services")).order_by( - *units_order_list - ) + + if units_order_list: + units_qs = units_qs.annotate(num_services=Count("services")).order_by( + *units_order_list + ) + units_qs = units_qs[: model_limits["unit"]] else: units_qs = Unit.objects.none() From dc7fa11812bd85fec6355cdc911f5816cb98df7d Mon Sep 17 00:00:00 2001 From: juuso-j <68938778+juuso-j@users.noreply.github.com> Date: Wed, 17 Apr 2024 11:30:56 +0300 Subject: [PATCH 19/47] Display search API documentation --- smbackend/settings.py | 1 + 1 file changed, 1 insertion(+) diff --git a/smbackend/settings.py b/smbackend/settings.py index 9263363aa..14ada4f3d 100644 --- a/smbackend/settings.py +++ b/smbackend/settings.py @@ -357,6 +357,7 @@ def gettext(s): "/environment_data/api/v1/data/", "/exceptional_situations/api/v1/situation/", "/exceptional_situations/api/v1/situation_type/", + "/api/v2/search", ] From 7df0e8c7e8ccf1ccf70f2699245b93189b1878ab Mon Sep 17 00:00:00 2001 From: juuso-j <68938778+juuso-j@users.noreply.github.com> Date: Wed, 17 Apr 2024 11:39:46 +0300 Subject: [PATCH 20/47] Catch similarity errors more gracefully --- services/search/utils.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/services/search/utils.py b/services/search/utils.py index 494742998..42def8bf3 100644 --- a/services/search/utils.py +++ b/services/search/utils.py @@ -1,6 +1,9 @@ +import logging + import libvoikko from django.db import connection from django.db.models import Case, When +from rest_framework.exceptions import ParseError from services.models import ExclusionRule, ServiceNode, ServiceNodeUnitCount, Unit from services.search.constants import ( @@ -8,6 +11,7 @@ SEARCHABLE_MODEL_TYPE_NAMES, ) +logger = logging.getLogger("search") voikko = libvoikko.Voikko("fi") voikko.setNoUglyHyphenation(True) @@ -188,11 +192,15 @@ def get_trigram_results( ): sql = f"""SELECT id, similarity({field}, %s) AS sml FROM {model_name} - WHERE similarity({field},%s) >= {threshold} + WHERE similarity({field}, %s) >= {threshold} ORDER BY sml DESC; """ cursor = connection.cursor() - cursor.execute(sql, [q_val, q_val]) + try: + cursor.execute(sql, [q_val, q_val]) + except Exception as e: + logger.error(f"Error in similarity query: {e}") + raise ParseError("Similariy query failed.") all_results = cursor.fetchall() ids = [row[0] for row in all_results] objs = model.objects.filter(id__in=ids) From 8b8b5bfdee068a91b85fa7206b9ec6323ce24d04 Mon Sep 17 00:00:00 2001 From: juuso-j <68938778+juuso-j@users.noreply.github.com> Date: Thu, 18 Apr 2024 09:10:16 +0300 Subject: [PATCH 21/47] Assert response status code --- eco_counter/management/commands/utils.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/eco_counter/management/commands/utils.py b/eco_counter/management/commands/utils.py index e358db319..285c4f45e 100644 --- a/eco_counter/management/commands/utils.py +++ b/eco_counter/management/commands/utils.py @@ -258,6 +258,11 @@ def get_traffic_counter_csv(start_year=2015): def get_lam_dataframe(csv_url): response = requests.get(csv_url, headers=LAM_STATION_USER_HEADER) + assert ( + response.status_code == 200 + ), "Fetching LAM data from {} , status code {}".format( + settings.ECO_COUNTER_STATIONS_URL, response.status_code + ) string_data = response.content csv_data = pd.read_csv(io.StringIO(string_data.decode("utf-8")), delimiter=";") return csv_data From 73c98fcd2264da874b9a83383da263c0b47b0c5c Mon Sep 17 00:00:00 2001 From: juuso-j <68938778+juuso-j@users.noreply.github.com> Date: Thu, 18 Apr 2024 09:11:06 +0300 Subject: [PATCH 22/47] Run initial import only if data is found --- .../commands/import_counter_data.py | 153 +++++++++++------- 1 file changed, 95 insertions(+), 58 deletions(-) diff --git a/eco_counter/management/commands/import_counter_data.py b/eco_counter/management/commands/import_counter_data.py index b1ad9207c..8f4063818 100644 --- a/eco_counter/management/commands/import_counter_data.py +++ b/eco_counter/management/commands/import_counter_data.py @@ -387,7 +387,7 @@ def save_telraam_data(start_time): ) -def handle_initial_import(initial_import_counters): +def handle_initial_impor2t(initial_import_counters): delete_tables(csv_data_sources=initial_import_counters) for counter in initial_import_counters: ImportState.objects.filter(csv_data_source=counter).delete() @@ -400,51 +400,62 @@ def handle_initial_import(initial_import_counters): save_stations(counter) -def import_data(counters): - for counter in counters: - logger.info(f"Importing/counting data for {counter}...") - import_state = ImportState.objects.filter(csv_data_source=counter).first() - if not import_state: - logger.error( - "ImportState instance not found, try importing with the '--init' argument." - ) - break - if import_state.current_year_number and import_state.current_month_number: - start_time = "{year}-{month}-1T00:00".format( - year=import_state.current_year_number, - month=import_state.current_month_number, - ) - else: - start_month = ( - TELRAAM_COUNTER_START_MONTH if counter == TELRAAM_COUNTER else "01" - ) - start_time = f"{COUNTER_START_YEARS[counter]}-{start_month}-01" +def handle_initial_import(counter): + logger.info(f"Deleting tables for: {counter}") + delete_tables(csv_data_sources=[counter]) + ImportState.objects.filter(csv_data_source=counter).delete() + import_state = ImportState.objects.create(csv_data_source=counter) + logger.info(f"Retrieving stations for {counter}.") + # As Telraam counters are dynamic, create after CSV data is processed + if counter == TELRAAM_COUNTER: + Station.objects.filter(csv_data_source=counter).delete() + else: + save_stations(counter) + return import_state - start_time = dateutil.parser.parse(start_time) - start_time = TIMEZONE.localize(start_time) - # The timeformat for the input data is : 2020-03-01T00:00 - # Convert starting time to input datas timeformat - start_time_string = start_time.strftime("%Y-%m-%dT%H:%M") - match counter: - # case COUNTERS.TELRAAM_COUNTER: - # Telraam counters are handled differently due to their dynamic nature - case COUNTERS.LAM_COUNTER: - csv_data = get_lam_counter_csv(start_time.date()) - case COUNTERS.ECO_COUNTER: - csv_data = get_eco_counter_csv() - case COUNTERS.TRAFFIC_COUNTER: - if import_state.current_year_number: - start_year = import_state.current_year_number - else: - start_year = TRAFFIC_COUNTER_START_YEAR - csv_data = get_traffic_counter_csv(start_year=start_year) - if counter == TELRAAM_COUNTER: - save_telraam_data(start_time) - else: - start_index = csv_data.index[ - csv_data[INDEX_COLUMN_NAME] == start_time_string - ].values[0] +def get_start_time(counter, import_state): + if import_state.current_year_number and import_state.current_month_number: + start_time = "{year}-{month}-1T00:00".format( + year=import_state.current_year_number, + month=import_state.current_month_number, + ) + else: + start_month = ( + TELRAAM_COUNTER_START_MONTH if counter == TELRAAM_COUNTER else "01" + ) + start_time = f"{COUNTER_START_YEARS[counter]}-{start_month}-01" + + start_time = dateutil.parser.parse(start_time) + start_time = TIMEZONE.localize(start_time) + # The timeformat for the input data is : 2020-03-01T00:00 + # Convert starting time to input datas timeformat + return start_time + + +def get_csv_data(counter, import_state, start_time, verbose=True): + match counter: + # case COUNTERS.TELRAAM_COUNTER: + # Telraam counters are handled differently due to their dynamic nature + case COUNTERS.LAM_COUNTER: + csv_data = get_lam_counter_csv(start_time.date()) + case COUNTERS.ECO_COUNTER: + csv_data = get_eco_counter_csv() + case COUNTERS.TRAFFIC_COUNTER: + if import_state.current_year_number: + start_year = import_state.current_year_number + else: + start_year = TRAFFIC_COUNTER_START_YEAR + csv_data = get_traffic_counter_csv(start_year=start_year) + + if counter == TELRAAM_COUNTER: + save_telraam_data(start_time) + else: + start_time_string = start_time.strftime("%Y-%m-%dT%H:%M") + start_index = csv_data.index[ + csv_data[INDEX_COLUMN_NAME] == start_time_string + ].values[0] + if verbose: # As LAM data is fetched with a timespan, no index data is available, instead # show time. if counter == LAM_COUNTER: @@ -452,19 +463,45 @@ def import_data(counters): else: logger.info(f"Starting saving observations at index:{start_index}") - csv_data = csv_data[start_index:] - save_observations( - csv_data, - start_time, - csv_data_source=counter, + csv_data = csv_data[start_index:] + return csv_data + + +def import_data(counters, initial_import=False): + for counter in counters: + logger.info(f"Importing/counting data for {counter}...") + import_state = ImportState.objects.filter(csv_data_source=counter).first() + # Before deleting state and data, check that data is available. + if import_state and initial_import: + start_time = get_start_time(counter, import_state) + csv_data = get_csv_data(counter, import_state, start_time, verbose=False) + if len(csv_data) == 0: + logger.info( + "No data to retrieve, skipping initial import. Use --force-init to discard." + ) + continue + if initial_import: + handle_initial_import(counter) + + if not import_state: + logger.error( + "ImportState instance not found, try importing with the '--init' argument." ) - # Try to free some memory - del csv_data - gc.collect() + break + start_time = get_start_time(counter, import_state) + csv_data = get_csv_data(counter, import_state, start_time) + save_observations( + csv_data, + start_time, + csv_data_source=counter, + ) + # Try to free some memory + del csv_data + gc.collect() -def add_additional_data_to_stations(csv_data_source): +def add_additional_data_to_stations(csv_data_source): logger.info(f"Updating {csv_data_source} stations informations...") for station in Station.objects.filter(csv_data_source=csv_data_source): station.data_from_date = get_data_from_date(station) @@ -504,6 +541,8 @@ def add_arguments(self, parser): def handle(self, *args, **options): initial_import_counters = None start_time = None + initial_import = False + if options["initial_import"]: if len(options["initial_import"]) == 0: raise CommandError( @@ -511,9 +550,7 @@ def handle(self, *args, **options): ) else: initial_import_counters = options["initial_import"] - check_counters_argument(initial_import_counters) - logger.info(f"Deleting tables for: {initial_import_counters}") - handle_initial_import(initial_import_counters) + initial_import = True if options["test_counter"]: logger.info("Testing eco_counter importer.") @@ -536,7 +573,7 @@ def handle(self, *args, **options): if not initial_import_counters: # run with counters argument counters = options["counters"] - check_counters_argument(counters) else: counters = initial_import_counters - import_data(counters) + check_counters_argument(counters) + import_data(counters, initial_import) From 3b443db9e721bbb9ad5570a78eef6f7c37f43273 Mon Sep 17 00:00:00 2001 From: juuso-j <68938778+juuso-j@users.noreply.github.com> Date: Thu, 18 Apr 2024 11:33:36 +0300 Subject: [PATCH 23/47] Add force_initial_import_counter_data task --- eco_counter/tasks.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/eco_counter/tasks.py b/eco_counter/tasks.py index af525b84a..b15b4c82f 100644 --- a/eco_counter/tasks.py +++ b/eco_counter/tasks.py @@ -13,6 +13,11 @@ def initial_import_counter_data(args, name="initial_import_counter_data"): management.call_command("import_counter_data", "--init", args) +@shared_task_email +def force_initial_import_counter_data(args, name="force_initial_import_counter_data"): + management.call_command("import_counter_data", "--force", "--init", args) + + @shared_task_email def delete_counter_data(args, name="delete_counter_data"): management.call_command("delete_counter_data", "--counters", args) From 923d2f269e605a9f7e07e17d39fa39f7c48f2ae2 Mon Sep 17 00:00:00 2001 From: juuso-j <68938778+juuso-j@users.noreply.github.com> Date: Thu, 18 Apr 2024 11:41:11 +0300 Subject: [PATCH 24/47] Add --force option --- .../commands/import_counter_data.py | 33 ++++++++----------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/eco_counter/management/commands/import_counter_data.py b/eco_counter/management/commands/import_counter_data.py index 8f4063818..1227c73a7 100644 --- a/eco_counter/management/commands/import_counter_data.py +++ b/eco_counter/management/commands/import_counter_data.py @@ -387,19 +387,6 @@ def save_telraam_data(start_time): ) -def handle_initial_impor2t(initial_import_counters): - delete_tables(csv_data_sources=initial_import_counters) - for counter in initial_import_counters: - ImportState.objects.filter(csv_data_source=counter).delete() - ImportState.objects.create(csv_data_source=counter) - logger.info(f"Retrieving stations for {counter}.") - # As Telraam counters are dynamic, create after CSV data is processed - if counter == TELRAAM_COUNTER: - Station.objects.filter(csv_data_source=counter).delete() - else: - save_stations(counter) - - def handle_initial_import(counter): logger.info(f"Deleting tables for: {counter}") delete_tables(csv_data_sources=[counter]) @@ -467,30 +454,33 @@ def get_csv_data(counter, import_state, start_time, verbose=True): return csv_data -def import_data(counters, initial_import=False): +def import_data(counters, initial_import=False, force=False): for counter in counters: logger.info(f"Importing/counting data for {counter}...") import_state = ImportState.objects.filter(csv_data_source=counter).first() + # Before deleting state and data, check that data is available. - if import_state and initial_import: + if not force and import_state and initial_import: start_time = get_start_time(counter, import_state) csv_data = get_csv_data(counter, import_state, start_time, verbose=False) if len(csv_data) == 0: logger.info( - "No data to retrieve, skipping initial import. Use --force-init to discard." + "No data to retrieve, skipping initial import. Use --force to discard." ) continue + if initial_import: handle_initial_import(counter) + import_state = ImportState.objects.filter(csv_data_source=counter).first() if not import_state: logger.error( "ImportState instance not found, try importing with the '--init' argument." ) break + start_time = get_start_time(counter, import_state) csv_data = get_csv_data(counter, import_state, start_time) - save_observations( csv_data, start_time, @@ -537,12 +527,17 @@ def add_arguments(self, parser): default=False, help=f"Import specific counter(s) data, choices are: {COUNTER_CHOICES_STR}.", ) + parser.add_argument( + "--force", + action="store_true", + help="Force the initial import and discard data check", + ) def handle(self, *args, **options): initial_import_counters = None start_time = None initial_import = False - + force = options.get("force", False) if options["initial_import"]: if len(options["initial_import"]) == 0: raise CommandError( @@ -576,4 +571,4 @@ def handle(self, *args, **options): else: counters = initial_import_counters check_counters_argument(counters) - import_data(counters, initial_import) + import_data(counters, initial_import, force) From 52072775aaad439743fa7efc19a2346a129b1229 Mon Sep 17 00:00:00 2001 From: juuso-j <68938778+juuso-j@users.noreply.github.com> Date: Thu, 18 Apr 2024 12:52:08 +0300 Subject: [PATCH 25/47] Handle telraam importing in import_data function --- .../commands/import_counter_data.py | 50 +++++++++---------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/eco_counter/management/commands/import_counter_data.py b/eco_counter/management/commands/import_counter_data.py index 1227c73a7..7b36e6ada 100644 --- a/eco_counter/management/commands/import_counter_data.py +++ b/eco_counter/management/commands/import_counter_data.py @@ -435,23 +435,19 @@ def get_csv_data(counter, import_state, start_time, verbose=True): start_year = TRAFFIC_COUNTER_START_YEAR csv_data = get_traffic_counter_csv(start_year=start_year) - if counter == TELRAAM_COUNTER: - save_telraam_data(start_time) - else: - start_time_string = start_time.strftime("%Y-%m-%dT%H:%M") - start_index = csv_data.index[ - csv_data[INDEX_COLUMN_NAME] == start_time_string - ].values[0] - if verbose: - # As LAM data is fetched with a timespan, no index data is available, instead - # show time. - if counter == LAM_COUNTER: - logger.info(f"Starting saving observations at time:{start_time}") - else: - logger.info(f"Starting saving observations at index:{start_index}") + start_time_string = start_time.strftime("%Y-%m-%dT%H:%M") + start_index = csv_data.index[ + csv_data[INDEX_COLUMN_NAME] == start_time_string + ].values[0] + if verbose: + # As LAM data is fetched with a timespan, no index data is available, instead display start_time. + if counter == LAM_COUNTER: + logger.info(f"Starting saving observations at time:{start_time}") + else: + logger.info(f"Starting saving observations at index:{start_index}") - csv_data = csv_data[start_index:] - return csv_data + csv_data = csv_data[start_index:] + return csv_data def import_data(counters, initial_import=False, force=False): @@ -480,15 +476,19 @@ def import_data(counters, initial_import=False, force=False): break start_time = get_start_time(counter, import_state) - csv_data = get_csv_data(counter, import_state, start_time) - save_observations( - csv_data, - start_time, - csv_data_source=counter, - ) - # Try to free some memory - del csv_data - gc.collect() + + if counter == TELRAAM_COUNTER: + save_telraam_data(start_time) + else: + csv_data = get_csv_data(counter, import_state, start_time) + save_observations( + csv_data, + start_time, + csv_data_source=counter, + ) + # Try to free some memory + del csv_data + gc.collect() def add_additional_data_to_stations(csv_data_source): From 6e0a5d1ef4324c708fffd91781577096ab6a9cf5 Mon Sep 17 00:00:00 2001 From: juuso-j <68938778+juuso-j@users.noreply.github.com> Date: Fri, 19 Apr 2024 14:25:52 +0300 Subject: [PATCH 26/47] Use django-filterset to filter parameters --- environment_data/api/utils.py | 182 ++++++++++++++++++++++++---------- environment_data/api/views.py | 136 +++++++++++-------------- 2 files changed, 185 insertions(+), 133 deletions(-) diff --git a/environment_data/api/utils.py b/environment_data/api/utils.py index 4c78d2206..2b69b104b 100644 --- a/environment_data/api/utils.py +++ b/environment_data/api/utils.py @@ -1,55 +1,127 @@ -from datetime import datetime - -from rest_framework.exceptions import ParseError - -from .constants import DATA_TYPES, DATETIME_FORMATS, DAY, HOUR, MONTH, WEEK, YEAR - - -def validate_timestamp(timestamp_str, data_type): - time_format = DATETIME_FORMATS[data_type] - try: - datetime.strptime(timestamp_str, time_format) - except ValueError: - return f"{timestamp_str} invalid format date format, valid format for type {data_type} is {time_format}" - return None - - -def get_start_and_end_and_year(filters, data_type): - start = filters.get("start", None) - end = filters.get("end", None) - year = filters.get("year", None) - - if not start or not end: - raise ParseError("Supply both 'start' and 'end' parameters") - - if YEAR not in data_type and not year: - raise ParseError("Supply 'year' parameter") - - res1 = None - res2 = None - match data_type: - case DATA_TYPES.DAY: - res1 = validate_timestamp(start, DAY) - res2 = validate_timestamp(end, DAY) - case DATA_TYPES.HOUR: - res1 = validate_timestamp(start, HOUR) - res2 = validate_timestamp(end, HOUR) - case DATA_TYPES.WEEK: - res1 = validate_timestamp(start, WEEK) - res2 = validate_timestamp(end, WEEK) - case DATA_TYPES.MONTH: - res1 = validate_timestamp(start, MONTH) - res2 = validate_timestamp(end, MONTH) - case DATA_TYPES.YEAR: - res1 = validate_timestamp(start, YEAR) - res2 = validate_timestamp(end, YEAR) - - if res1: - raise ParseError(res1) - if res2: - raise ParseError(res2) - - if HOUR in data_type or DAY in data_type: - start = f"{year}-{start}" - end = f"{year}-{end}" - return start, end, year +import django_filters + +from environment_data.models import ( + DayData, + HourData, + MonthData, + Station, + WeekData, + YearData, +) + + +class StationFilterSet(django_filters.FilterSet): + geo_id = django_filters.NumberFilter(field_name="geo_id", lookup_expr="exact") + name = django_filters.CharFilter(lookup_expr="icontains") + + class Meta: + model = Station + fields = {"data_type": ["exact"]} + + +class BaseFilterSet(django_filters.FilterSet): + + station_id = django_filters.NumberFilter(field_name="station") + + class Meta: + fields = {"station": ["exact"]} + + def get_date(self, year_number, month_and_day): + return f"{year_number}-{month_and_day}" + + +class YearDataFilterSet(django_filters.FilterSet): + station_id = django_filters.NumberFilter(field_name="station") + start = django_filters.NumberFilter( + field_name="year__year_number", lookup_expr="gte" + ) + end = django_filters.NumberFilter(field_name="year__year_number", lookup_expr="lte") + + class Meta: + model = YearData + fields = {"station": ["exact"]} + + +class MonthDataFilterSet(BaseFilterSet): + def filter_year(self, queryset, field, year): + return queryset.filter(month__year__year_number=year) + + year = django_filters.NumberFilter(method="filter_year") + start = django_filters.NumberFilter( + field_name="month__month_number", lookup_expr="gte" + ) + end = django_filters.NumberFilter( + field_name="month__month_number", lookup_expr="lte" + ) + + class Meta: + model = MonthData + fields = BaseFilterSet.Meta.fields + + +class WeekDataFilterSet(BaseFilterSet): + def filter_year(self, queryset, field, year): + return queryset.filter(week__years__year_number=year) + + year = django_filters.NumberFilter(method="filter_year") + start = django_filters.NumberFilter( + field_name="week__week_number", lookup_expr="gte" + ) + end = django_filters.NumberFilter(field_name="week__week_number", lookup_expr="lte") + + class Meta: + model = WeekData + fields = BaseFilterSet.Meta.fields + + +class DateDataFilterSet(BaseFilterSet): + DATE_MODEL_NAME = None + YEAR_LOOKUP = None + + def filter_year(self, queryset, field, year): + return queryset.filter(**{f"{self.DATE_MODEL_NAME}__year__year_number": year}) + + def filter_start(self, queryset, field, start): + first = queryset.first() + if first: + lookup = first + if self.YEAR_LOOKUP: + lookup = getattr(first, self.YEAR_LOOKUP) + date = self.get_date(lookup.day.year.year_number, start) + return queryset.filter(**{f"{self.DATE_MODEL_NAME}__date__gte": date}) + else: + return queryset.none() + + def filter_end(self, queryset, field, end): + first = queryset.first() + if first: + lookup = first + if self.YEAR_LOOKUP: + lookup = getattr(first, self.YEAR_LOOKUP) + date = self.get_date(lookup.day.year.year_number, end) + return queryset.filter(**{f"{self.DATE_MODEL_NAME}__date__lte": date}) + else: + return queryset.none() + + year = django_filters.NumberFilter(method="filter_year") + start = django_filters.CharFilter(method="filter_start") + end = django_filters.CharFilter(method="filter_end") + + +class DayDataFilterSet(DateDataFilterSet): + + DATE_MODEL_NAME = "day" + + class Meta: + model = DayData + fields = BaseFilterSet.Meta.fields + + +class HourDataFilterSet(DateDataFilterSet): + + DATE_MODEL_NAME = "hour__day" + YEAR_LOOKUP = "hour" + + class Meta: + model = HourData + fields = BaseFilterSet.Meta.fields diff --git a/environment_data/api/views.py b/environment_data/api/views.py index c52a89ce6..da4dc8766 100644 --- a/environment_data/api/views.py +++ b/environment_data/api/views.py @@ -1,12 +1,12 @@ from django.utils.decorators import method_decorator from django.views.decorators.cache import cache_page +from django_filters.rest_framework import DjangoFilterBackend from drf_spectacular.utils import extend_schema, extend_schema_view -from rest_framework import status, viewsets -from rest_framework.response import Response +from rest_framework import viewsets +from rest_framework.exceptions import ValidationError from environment_data.api.constants import ( DATA_TYPES, - DATETIME_FORMATS, ENVIRONMENT_DATA_PARAMS, ENVIRONMENT_STATION_PARAMS, ) @@ -19,7 +19,7 @@ WeekDataSerializer, YearDataSerializer, ) -from environment_data.constants import DATA_TYPES_LIST, VALID_DATA_TYPE_CHOICES +from environment_data.constants import DATA_TYPES_LIST from environment_data.models import ( DayData, HourData, @@ -30,7 +30,14 @@ YearData, ) -from .utils import get_start_and_end_and_year +from .utils import ( + DayDataFilterSet, + HourDataFilterSet, + MonthDataFilterSet, + StationFilterSet, + WeekDataFilterSet, + YearDataFilterSet, +) @extend_schema_view( @@ -42,25 +49,12 @@ class StationViewSet(viewsets.ReadOnlyModelViewSet): queryset = Station.objects.all() serializer_class = StationSerializer + filter_backends = [DjangoFilterBackend] + filterset_class = StationFilterSet @method_decorator(cache_page(60 * 60)) def list(self, request, *args, **kwargs): - queryset = self.queryset - filters = self.request.query_params - data_type = filters.get("data_type", None) - if data_type: - data_type = str(data_type).upper() - if data_type not in DATA_TYPES_LIST: - return Response( - f"Invalid data type, valid types are: {VALID_DATA_TYPE_CHOICES}", - status=status.HTTP_400_BAD_REQUEST, - ) - - queryset = queryset.filter(data_type=data_type) - - page = self.paginate_queryset(queryset) - serializer = self.serializer_class(page, many=True) - return self.get_paginated_response(serializer.data) + return super().list(request, *args, **kwargs) @extend_schema_view( @@ -82,78 +76,64 @@ class ParameterViewSet(viewsets.ReadOnlyModelViewSet): ) ) class DataViewSet(viewsets.GenericViewSet): - queryset = YearData.objects.all() - def list(self, request, *args, **kwargs): - filters = self.request.query_params - station_id = filters.get("station_id", None) - if not station_id: - return Response( - "Supply 'station_id' parameter.", status=status.HTTP_400_BAD_REQUEST - ) - else: - try: - station = Station.objects.get(id=station_id) - except Station.DoesNotExist: - return Response( - f"Station with id {station_id} not found.", - status=status.HTTP_400_BAD_REQUEST, - ) + queryset = [] + serializer_class = None - data_type = filters.get("type", None) - if not data_type: - return Response( - "Supply 'type' parameter", status=status.HTTP_400_BAD_REQUEST - ) - else: - data_type = data_type.lower() + def get_serializer_class(self): + data_type = self.request.query_params.get("type", "").lower() + match data_type: + case DATA_TYPES.HOUR: + return HourDataSerializer + case DATA_TYPES.DAY: + return DayDataSerializer + case DATA_TYPES.WEEK: + return WeekDataSerializer + case DATA_TYPES.MONTH: + return MonthDataSerializer + case DATA_TYPES.YEAR: + return YearDataSerializer + case _: + raise ValidationError( + f"Provide a valid 'type' parameter. Valid types are: {', '.join([f for f in DATA_TYPES_LIST])}", + ) - start, end, year = get_start_and_end_and_year(filters, data_type) + def get_queryset(self): + params = self.request.query_params + data_type = params.get("type", "").lower() + queryset = YearData.objects.all() match data_type: case DATA_TYPES.HOUR: - queryset = HourData.objects.filter( - station=station, - hour__day__year__year_number=year, - hour__day__date__gte=start, - hour__day__date__lte=end, + filter_set = HourDataFilterSet( + data=params, queryset=HourData.objects.all() ) - serializer_class = HourDataSerializer case DATA_TYPES.DAY: - queryset = DayData.objects.filter( - station=station, - day__date__gte=start, - day__date__lte=end, - day__year__year_number=year, + filter_set = DayDataFilterSet( + data=params, queryset=DayData.objects.all() ) - serializer_class = DayDataSerializer case DATA_TYPES.WEEK: - serializer_class = WeekDataSerializer - queryset = WeekData.objects.filter( - week__years__year_number=year, - station=station, - week__week_number__gte=start, - week__week_number__lte=end, + filter_set = WeekDataFilterSet( + data=params, queryset=WeekData.objects.all() ) case DATA_TYPES.MONTH: - serializer_class = MonthDataSerializer - queryset = MonthData.objects.filter( - month__year__year_number=year, - station=station, - month__month_number__gte=start, - month__month_number__lte=end, + filter_set = MonthDataFilterSet( + data=params, queryset=MonthData.objects.all() ) case DATA_TYPES.YEAR: - serializer_class = YearDataSerializer - queryset = YearData.objects.filter( - station=station, - year__year_number__gte=start, - year__year_number__lte=end, + filter_set = YearDataFilterSet( + data=params, queryset=YearData.objects.all() ) case _: - return Response( - f"Provide a valid 'type' parameters. Valid types are: {', '.join([f for f in DATETIME_FORMATS])}", - status=status.HTTP_400_BAD_REQUEST, + raise ValidationError( + f"Provide a valid 'type' parameter. Valid types are: {', '.join([f for f in DATA_TYPES_LIST])}", ) + if filter_set and filter_set.is_valid(): + return filter_set.qs + else: + return queryset.none() + + def list(self, request, *args, **kwargs): + queryset = self.get_queryset() page = self.paginate_queryset(queryset) - serializer = serializer_class(page, many=True) + serializer = self.get_serializer_class()(page, many=True) return self.get_paginated_response(serializer.data) From 3337254b8e2a394240749f85a430a5df1f142207 Mon Sep 17 00:00:00 2001 From: juuso-j <68938778+juuso-j@users.noreply.github.com> Date: Fri, 19 Apr 2024 14:27:00 +0300 Subject: [PATCH 27/47] Add tests and fixtures --- environment_data/tests/conftest.py | 25 ++++++++++++++++-- environment_data/tests/test_api.py | 41 ++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 2 deletions(-) diff --git a/environment_data/tests/conftest.py b/environment_data/tests/conftest.py index 6dad1c53b..fe65f6c48 100644 --- a/environment_data/tests/conftest.py +++ b/environment_data/tests/conftest.py @@ -49,6 +49,8 @@ def stations(parameters): @pytest.fixture def measurements(parameters): Measurement.objects.create(id=1, parameter=Parameter.objects.get(id=1), value=1.5) + Measurement.objects.create(id=2, parameter=Parameter.objects.get(id=2), value=2) + return Measurement.objects.all() @@ -58,7 +60,6 @@ def parameters(): Parameter.objects.create(id=1, name="AQINDEX_PT1H_avg") Parameter.objects.create(id=2, name="NO2_PT1H_avg") Parameter.objects.create(id=3, name="WS_PT1H_avg") - return Parameter.objects.all() @@ -66,6 +67,7 @@ def parameters(): @pytest.fixture def years(): Year.objects.create(id=1, year_number=2023) + Year.objects.create(id=2, year_number=2022) return Year.objects.all() @@ -73,6 +75,7 @@ def years(): @pytest.fixture def months(years): Month.objects.create(month_number=1, year=years[0]) + Month.objects.create(month_number=1, year=years[1]) return Month.objects.all() @@ -81,6 +84,8 @@ def months(years): def weeks(years): week = Week.objects.create(week_number=1) week.years.add(years[0]) + week = Week.objects.create(week_number=1) + week.years.add(years[1]) return Week.objects.all() @@ -93,6 +98,12 @@ def days(years, months, weeks): month=months[0], week=weeks[0], ) + Day.objects.create( + date=parser.parse("2022-01-01 00:00:00"), + year=years[1], + month=months[1], + week=weeks[1], + ) return Day.objects.all() @@ -100,6 +111,7 @@ def days(years, months, weeks): @pytest.fixture def hours(days): Hour.objects.create(day=days[0], hour_number=0) + Hour.objects.create(day=days[1], hour_number=0) return Hour.objects.all() @@ -108,6 +120,8 @@ def hours(days): def year_datas(stations, years, measurements): year_data = YearData.objects.create(station=stations[0], year=years[0]) year_data.measurements.add(measurements[0]) + year_data = YearData.objects.create(station=stations[0], year=years[1]) + year_data.measurements.add(measurements[1]) return YearData.objects.all() @@ -116,6 +130,8 @@ def year_datas(stations, years, measurements): def month_datas(stations, months, measurements): month_data = MonthData.objects.create(station=stations[0], month=months[0]) month_data.measurements.add(measurements[0]) + month_data = MonthData.objects.create(station=stations[0], month=months[1]) + month_data.measurements.add(measurements[1]) return MonthData.objects.all() @@ -124,14 +140,17 @@ def month_datas(stations, months, measurements): def week_datas(stations, weeks, measurements): week_data = WeekData.objects.create(station=stations[0], week=weeks[0]) week_data.measurements.add(measurements[0]) + week_data = WeekData.objects.create(station=stations[0], week=weeks[1]) + week_data.measurements.add(measurements[1]) return WeekData.objects.all() -@pytest.mark.django_db @pytest.fixture def day_datas(stations, days, measurements): day_data = DayData.objects.create(station=stations[0], day=days[0]) day_data.measurements.add(measurements[0]) + day_data = DayData.objects.create(station=stations[0], day=days[1]) + day_data.measurements.add(measurements[1]) return DayData.objects.all() @@ -140,4 +159,6 @@ def day_datas(stations, days, measurements): def hour_datas(stations, hours, measurements): hour_data = HourData.objects.create(station=stations[0], hour=hours[0]) hour_data.measurements.add(measurements[0]) + hour_data = HourData.objects.create(station=stations[0], hour=hours[1]) + hour_data.measurements.add(measurements[1]) return HourData.objects.all() diff --git a/environment_data/tests/test_api.py b/environment_data/tests/test_api.py index 6635fc5c0..aecb46962 100644 --- a/environment_data/tests/test_api.py +++ b/environment_data/tests/test_api.py @@ -48,6 +48,7 @@ def test_day_data(api_client, day_datas, parameters): ) response = api_client.get(url) assert response.status_code == 200 + assert len(response.json()["results"]) == 1 json_data = response.json()["results"][0] assert len(json_data["measurements"]) == 1 assert json_data["measurements"][0]["value"] == 1.5 @@ -55,6 +56,17 @@ def test_day_data(api_client, day_datas, parameters): assert json_data["date"] == "2023-01-01" +@pytest.mark.django_db +def test_day_data_non_existing_year(api_client, day_datas, parameters): + url = ( + reverse("environment_data:data-list") + + "?year=2020&start=01-01&end=02-01&station_id=1&type=day" + ) + response = api_client.get(url) + assert response.status_code == 200 + assert len(response.json()["results"]) == 0 + + @pytest.mark.django_db def test_week_data(api_client, week_datas, parameters): url = ( @@ -63,6 +75,7 @@ def test_week_data(api_client, week_datas, parameters): ) response = api_client.get(url) assert response.status_code == 200 + assert len(response.json()["results"]) == 1 json_data = response.json()["results"][0] assert len(json_data["measurements"]) == 1 assert json_data["measurements"][0]["value"] == 1.5 @@ -78,11 +91,38 @@ def test_month_data(api_client, month_datas, parameters): ) response = api_client.get(url) assert response.status_code == 200 + assert len(response.json()["results"]) == 1 json_data = response.json()["results"][0] assert len(json_data["measurements"]) == 1 assert json_data["measurements"][0]["value"] == 1.5 assert json_data["measurements"][0]["parameter"] == parameters[0].name assert json_data["month_number"] == 1 + url = ( + reverse("environment_data:data-list") + + "?year=2023&start=1&end=1&station_id=411&type=month" + ) + response = api_client.get(url) + assert len(response.json()["results"]) == 0 + + +@pytest.mark.django_db +def test_month_data_non_existing_year(api_client, month_datas, parameters): + url = ( + reverse("environment_data:data-list") + + "?year=2020&start=1&end=1&station_id=411&type=month" + ) + response = api_client.get(url) + assert len(response.json()["results"]) == 0 + + +@pytest.mark.django_db +def test_month_data_chars_in_arguments(api_client, month_datas, parameters): + url = ( + reverse("environment_data:data-list") + + "?year=foo&start=abc&end=dce&station_id=foobar&type=month" + ) + response = api_client.get(url) + assert len(response.json()["results"]) == 0 @pytest.mark.django_db @@ -94,6 +134,7 @@ def test_year_data(api_client, year_datas, parameters): response = api_client.get(url) assert response.status_code == 200 json_data = response.json()["results"][0] + assert len(response.json()["results"]) == 1 assert len(json_data["measurements"]) == 1 assert json_data["measurements"][0]["value"] == 1.5 assert json_data["measurements"][0]["parameter"] == parameters[0].name From b0909a7b4576fecbadfe496290bdc2627272c4e5 Mon Sep 17 00:00:00 2001 From: juuso-j <68938778+juuso-j@users.noreply.github.com> Date: Tue, 23 Apr 2024 09:05:06 +0300 Subject: [PATCH 28/47] Handle multiple release_time formats --- .../commands/import_traffic_situations.py | 48 ++++++++++++------- 1 file changed, 30 insertions(+), 18 deletions(-) diff --git a/exceptional_situations/management/commands/import_traffic_situations.py b/exceptional_situations/management/commands/import_traffic_situations.py index 80e23c37b..0ddcc7c7e 100644 --- a/exceptional_situations/management/commands/import_traffic_situations.py +++ b/exceptional_situations/management/commands/import_traffic_situations.py @@ -34,7 +34,8 @@ "?inactiveHours=0&includeAreaGeometry=true&situationType=TRAFFIC_ANNOUNCEMENT" ) URLS = [ROAD_WORK_URL, TRAFFIC_ANNOUNCEMENT_URL] -DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ" +DATETIME_FORMATS = ["%Y-%m-%dT%H:%M:%S.%fZ", "%Y-%m-%dT%H:%M:%SZ"] + SOUTHWEST_FINLAND_POLYGON = Polygon( SOUTHWEST_FINLAND_BOUNDARY, srid=SOUTHWEST_FINLAND_BOUNDARY_SRID ) @@ -44,19 +45,21 @@ class Command(BaseCommand): def get_geos_geometry(self, feature_data): return GEOSGeometry(str(feature_data["geometry"]), srid=PROJECTION_SRID) - def create_location(self, geometry, announcement_data): + def create_location(self, geometry, announcement_data, announcement): location = None details = announcement_data["locationDetails"].get("roadAddressLocation", None) - details.update(announcement_data.get("location", None)) + if details: + details.update(announcement_data.get("location", None)) filter = { "geometry": geometry, "location": location, "details": details, + "announcement": announcement, } situation_location = SituationLocation.objects.create(**filter) return situation_location - def create_announcement(self, announcement_data, situation_location): + def create_announcement(self, announcement_data): title = announcement_data.get("title", "") description = announcement_data["location"].get("description", "") additional_info = {} @@ -81,7 +84,6 @@ def create_announcement(self, announcement_data, situation_location): if end_time: end_time = parser.parse(end_time) filter = { - "location": situation_location, "title": title, "description": description, "additional_info": additional_info, @@ -110,11 +112,21 @@ def handle(self, *args, **options): if not properties: continue situation_id = properties.get("situationId", None) - release_time = properties.get("releaseTime", None) - release_time = datetime.strptime(release_time, DATETIME_FORMAT).replace( - microsecond=0 - ) - release_time = timezone.make_aware(release_time, timezone.utc) + release_time_str = properties.get("releaseTime", None) + if release_time_str: + for format_str in DATETIME_FORMATS: + try: + release_time = datetime.strptime( + release_time_str, format_str + ) + except ValueError: + pass + else: + break + + if release_time.microsecond != 0: + release_time.replace(microsecond=0) + release_time = timezone.make_aware(release_time, timezone.utc) type_name = properties.get("situationType", None) sub_type_name = properties.get("trafficAnnouncementType", None) @@ -127,18 +139,18 @@ def handle(self, *args, **options): "situation_id": situation_id, "situation_type": situation_type, } - situation, _ = Situation.objects.get_or_create(**filter) + situation, created = Situation.objects.get_or_create(**filter) situation.release_time = release_time situation.save() - - SituationAnnouncement.objects.filter(situation=situation).delete() - situation.announcements.clear() + if not created: + SituationAnnouncement.objects.filter(situation=situation).delete() + situation.announcements.clear() for announcement_data in properties.get("announcements", []): - situation_location = self.create_location( - geometry, announcement_data - ) situation_announcement = self.create_announcement( - deepcopy(announcement_data), situation_location + deepcopy(announcement_data) + ) + self.create_location( + geometry, announcement_data, situation_announcement ) situation.announcements.add(situation_announcement) num_imported += 1 From a254ca06bb5021fe3d03a4eac1ff874f55451434 Mon Sep 17 00:00:00 2001 From: juuso-j <68938778+juuso-j@users.noreply.github.com> Date: Thu, 2 May 2024 12:59:01 +0300 Subject: [PATCH 29/47] Move get_csv_file_name to utils.py and rename to get_full_csv_file_name --- mobility_data/importers/charging_stations.py | 12 ++---------- mobility_data/importers/utils.py | 7 +++++++ mobility_data/tests/test_import_charging_stations.py | 8 ++++---- 3 files changed, 13 insertions(+), 14 deletions(-) diff --git a/mobility_data/importers/charging_stations.py b/mobility_data/importers/charging_stations.py index 21f80a9ad..555fe50b9 100644 --- a/mobility_data/importers/charging_stations.py +++ b/mobility_data/importers/charging_stations.py @@ -6,10 +6,9 @@ from munigeo.models import Municipality from .utils import ( - get_file_name_from_data_source, + get_full_csv_file_name, get_municipality_name, get_postal_code, - get_root_dir, get_street_name_translations, LANGUAGES, MobileUnitDataBase, @@ -111,16 +110,9 @@ def get_number_of_rows(file_name): return number_of_rows -def get_csv_file_name(): - file_name = get_file_name_from_data_source(CONTENT_TYPE_NAME) - if file_name: - return file_name - return f"{get_root_dir()}/mobility_data/data/{SOURCE_DATA_FILE_NAME}" - - def get_charging_station_objects(): # Store the imported stations to dict, the index is the key. - file_name = get_csv_file_name() + file_name = get_full_csv_file_name(SOURCE_DATA_FILE_NAME, CONTENT_TYPE_NAME) charging_stations = {} column_mappings = {} number_of_rows = get_number_of_rows(file_name) diff --git a/mobility_data/importers/utils.py b/mobility_data/importers/utils.py index 2a0a88ab5..b991b7267 100644 --- a/mobility_data/importers/utils.py +++ b/mobility_data/importers/utils.py @@ -388,3 +388,10 @@ def create_mobile_units_as_unit_references(service_id, content_type): obj.unit_id = unit.id objects.append(obj) save_to_database(objects, content_type) + + +def get_full_csv_file_name(csv_file_name, content_type_name): + file_name = get_file_name_from_data_source(content_type_name) + if file_name: + return file_name + return f"{get_root_dir()}/mobility_data/data/{csv_file_name}" diff --git a/mobility_data/tests/test_import_charging_stations.py b/mobility_data/tests/test_import_charging_stations.py index 8b0a99ae7..4e2cef58c 100644 --- a/mobility_data/tests/test_import_charging_stations.py +++ b/mobility_data/tests/test_import_charging_stations.py @@ -14,9 +14,9 @@ @pytest.mark.django_db -@patch("mobility_data.importers.charging_stations.get_csv_file_name") +@patch("mobility_data.importers.charging_stations.get_full_csv_file_name") def test_import_charging_stations( - get_csv_file_name_mock, + get_full_csv_file_name_mock, municipalities, administrative_division_type, administrative_division, @@ -30,7 +30,7 @@ def test_import_charging_stations( ) file_name = f"{get_root_dir()}/mobility_data/tests/data/charging_stations.csv" - get_csv_file_name_mock.return_value = file_name + get_full_csv_file_name_mock.return_value = file_name content_type = get_or_create_content_type_from_config(CONTENT_TYPE_NAME) objects = get_charging_station_objects() num_created, num_deleted = save_to_database(objects, content_type) @@ -79,7 +79,7 @@ def test_import_charging_stations( == f"{CHARGING_STATION_SERVICE_NAMES['en']}, Ratapihankatu 53" ) # Test that dublicates are not created - get_csv_file_name_mock.return_vale = file_name + get_full_csv_file_name_mock.return_vale = file_name content_type = get_or_create_content_type_from_config(CONTENT_TYPE_NAME) objects = get_charging_station_objects() num_created, num_deleted = save_to_database(objects, content_type) From ebd1e06e65e6388293923bc3f2f01f2632d86473 Mon Sep 17 00:00:00 2001 From: juuso-j <68938778+juuso-j@users.noreply.github.com> Date: Thu, 2 May 2024 15:12:43 +0300 Subject: [PATCH 30/47] Add parking garage importer --- mobility_data/README.md | 5 + .../importers/data/content_types.yml | 5 + mobility_data/importers/parking_garages.py | 91 +++++++++++++++++++ .../commands/import_parking_garages.py | 24 +++++ 4 files changed, 125 insertions(+) create mode 100644 mobility_data/importers/parking_garages.py create mode 100644 mobility_data/management/commands/import_parking_garages.py diff --git a/mobility_data/README.md b/mobility_data/README.md index 36398ce48..b0b6c1db9 100644 --- a/mobility_data/README.md +++ b/mobility_data/README.md @@ -26,6 +26,11 @@ To import data type: ``` ./manage.py import_charging_stations ``` +### Parking garages +To import data type: +``` +./manage.py import_parking_garages +``` ### Culture Routes To import data type: ``` diff --git a/mobility_data/importers/data/content_types.yml b/mobility_data/importers/data/content_types.yml index c3b76eb5f..a40a6dbe8 100644 --- a/mobility_data/importers/data/content_types.yml +++ b/mobility_data/importers/data/content_types.yml @@ -17,6 +17,11 @@ content_types: sv: Elladningsstation för bilar en: Car e-charging point + - content_type_name: ParkingGarage + name: + fi: Parkkihalli + sv: Parkeringsgarage + en: Parking garage - content_type_name: NoStaffParking name: fi: Yleiset pysäköintialueet diff --git a/mobility_data/importers/parking_garages.py b/mobility_data/importers/parking_garages.py new file mode 100644 index 000000000..50edacd5e --- /dev/null +++ b/mobility_data/importers/parking_garages.py @@ -0,0 +1,91 @@ +import csv +import logging + +from django.conf import settings +from django.contrib.gis.geos import Point +from munigeo.models import Municipality + +from .utils import ( + get_full_csv_file_name, + get_municipality_name, + get_street_name_translations, + LANGUAGES, + MobileUnitDataBase, + split_string_at_digit, +) + +logger = logging.getLogger("mobility_data") +SOURCE_DATA_SRID = 3877 + +CONTENT_TYPE_NAME = "ParkingGarage" +SOURCE_DATA_FILE_NAME = "parkkihallit.csv" +COLUMN_MAPPINGS = { + "name": 0, + "address": 1, + "N": 2, + "E": 3, + "parking_spaces": 4, + "disabled_spaces": 5, + "charging_stations": 6, + "services_fi": 7, + "services_sv": 8, + "services_en": 9, + "notes_fi": 10, + "notes_sv": 11, + "notes_en": 12, +} + + +class ParkingGarage(MobileUnitDataBase): + + def __init__(self, values): + super().__init__() + x = float(values[COLUMN_MAPPINGS["E"]]) + y = float(values[COLUMN_MAPPINGS["N"]]) + self.geometry = Point(x, y, srid=SOURCE_DATA_SRID) + self.geometry.transform(settings.DEFAULT_SRID) + try: + self.municipality = Municipality.objects.get( + name=get_municipality_name(self.geometry) + ) + except Municipality.DoesNotExist: + self.municipality = None + address = values[COLUMN_MAPPINGS["address"]] + street_name, street_number = split_string_at_digit(address) + # As the source data contains only Finnish street names, we need to get the translations + translated_street_names = get_street_name_translations( + street_name.strip(), self.municipality + ) + self.extra["services"] = {} + self.extra["notes"] = {} + for lang in LANGUAGES: + self.name[lang] = values[COLUMN_MAPPINGS["name"]] + self.address[lang] = f"{translated_street_names[lang]} {street_number}" + self.extra["services"][lang] = values[COLUMN_MAPPINGS[f"services_{lang}"]] + self.extra["notes"][lang] = values[COLUMN_MAPPINGS[f"notes_{lang}"]] + + try: + parking_spaces = int(values[COLUMN_MAPPINGS["parking_spaces"]]) + except ValueError: + parking_spaces = None + self.extra["parking_spaces"] = parking_spaces + + try: + disabled_spaces = int(values[COLUMN_MAPPINGS["disabled_spaces"]]) + except ValueError: + disabled_spaces = None + self.extra["disabled_spaces"] = disabled_spaces + self.extra["charging_stations"] = values[COLUMN_MAPPINGS["charging_stations"]] + + +def get_parking_garage_objects(): + file_name = get_full_csv_file_name(SOURCE_DATA_FILE_NAME, CONTENT_TYPE_NAME) + parking_garages = [] + with open(file_name, encoding="utf-8-sig") as csv_file: + csv_reader = csv.reader(csv_file, delimiter=";") + for i, row in enumerate(csv_reader): + # Discard header row + if i > 0: + parking_garages.append(ParkingGarage(row)) + + return parking_garages diff --git a/mobility_data/management/commands/import_parking_garages.py b/mobility_data/management/commands/import_parking_garages.py new file mode 100644 index 000000000..ef2fd889f --- /dev/null +++ b/mobility_data/management/commands/import_parking_garages.py @@ -0,0 +1,24 @@ +import logging + +from django.core.management import BaseCommand + +from mobility_data.importers.parking_garages import ( + CONTENT_TYPE_NAME, + get_parking_garage_objects, +) +from mobility_data.importers.utils import ( + get_or_create_content_type_from_config, + log_imported_message, + save_to_database, +) + +logger = logging.getLogger("mobility_data") + + +class Command(BaseCommand): + def handle(self, *args, **options): + logger.info("Importing parking garages...") + objects = get_parking_garage_objects() + content_type = get_or_create_content_type_from_config(CONTENT_TYPE_NAME) + num_created, num_deleted = save_to_database(objects, content_type) + log_imported_message(logger, content_type, num_created, num_deleted) From f579b7e87942e08e736154d1006cb827853627f4 Mon Sep 17 00:00:00 2001 From: juuso-j <68938778+juuso-j@users.noreply.github.com> Date: Thu, 2 May 2024 15:15:02 +0300 Subject: [PATCH 31/47] Add ParkingGarage data type Allows uploading a new source data file for this data type --- mobility_data/constants.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mobility_data/constants.py b/mobility_data/constants.py index 8d5a03033..3fdb2edc8 100644 --- a/mobility_data/constants.py +++ b/mobility_data/constants.py @@ -12,6 +12,7 @@ from mobility_data.importers.loading_unloading_places import ( CONTENT_TYPE_NAME as LOADING_UNLOADING_PLACE, ) +from mobility_data.importers.parking_garages import CONTENT_TYPE_NAME as PARKING_GARAGE from mobility_data.importers.parking_machines import ( CONTENT_TYPE_NAME as PARKING_MACHINE, ) @@ -55,4 +56,8 @@ "importer_name": "parking_machines", "to_services_list": False, }, + PARKING_GARAGE: { + "importer_name": "parking_garages", + "to_services_list": False, + }, } From aec0a02dccb58e7004526994855729512a33ad39 Mon Sep 17 00:00:00 2001 From: juuso-j <68938778+juuso-j@users.noreply.github.com> Date: Thu, 2 May 2024 15:29:42 +0300 Subject: [PATCH 32/47] Add split_string_at_digit function --- mobility_data/importers/utils.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/mobility_data/importers/utils.py b/mobility_data/importers/utils.py index b991b7267..d0d24565a 100644 --- a/mobility_data/importers/utils.py +++ b/mobility_data/importers/utils.py @@ -395,3 +395,16 @@ def get_full_csv_file_name(csv_file_name, content_type_name): if file_name: return file_name return f"{get_root_dir()}/mobility_data/data/{csv_file_name}" + + +def split_string_at_digit(s): + # Split the string at the position of the first digit + match = re.search(r"\d", s) + if match: + index = match.start() + return ( + s[:index], + s[index:], + ) + else: + return s, "" From c6b830921617b753c21c450834419dc633279fa1 Mon Sep 17 00:00:00 2001 From: juuso-j <68938778+juuso-j@users.noreply.github.com> Date: Fri, 3 May 2024 08:41:46 +0300 Subject: [PATCH 33/47] Initial source parking garages source data --- mobility_data/data/parkkihallit.csv | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 mobility_data/data/parkkihallit.csv diff --git a/mobility_data/data/parkkihallit.csv b/mobility_data/data/parkkihallit.csv new file mode 100644 index 000000000..c634066f3 --- /dev/null +++ b/mobility_data/data/parkkihallit.csv @@ -0,0 +1,21 @@ +Nimi;Osoite;N;E;Pysäköintipaikat;Invapaikat;Sähkölatauspaikat;Palvelut;Palvelut (ru);Palvelut (eng);Huom;Huom (ru);Huom (eng);Linkki sivuille +Auriga;Juhana Herttuan puistokatu 21;6703305;23457449;330;2;2 x Type 2 22 kW;Apuvirta, hissi, liikkumisesteisen pysäköintipaikka, sähköauton latauspiste;Elbilsladdning, hiss, parkeringsplats för funktionshindrade, startkablar;Disabled parking, elevator, EV charging, jump leads;;;;https://www.aimopark.fi/kaupungit/turku/auriga-/ +Hansakortteli;Kristiinankatu 11;6704479;23459433;162;2;2 x Type 2 11kW, 2 x CSS 90kW;Apuvirta, hissi, kameratunnistus, kengänkiillotin, liikkumisesteisen pysäköintipaikka, ostoskärryt, rengaspumppu, sateenvarjon lainaus, sähköauton latauspiste, videovalvonta, yövartiointi;Elbilsladdning, hiss, kameraparkering, kameraövervakat, kundvagnar, nattlig övervakning, paraply, parkeringsplats för funktionshindrade, skoputs, startkablar, tryckluft;Automatic number-plate recognition (ANPR), CCTV, disabled parking, elevator, EV charging, jump leads, shoe polisher, shopping trolley, surveillance at night, tyre pump, umbrella hire;Parkkihallin omalla sivulla ja aimoparkin latausverkostokartalla eriävää tietoa latauspisteistä;Information angående elbilsladdning på parkeringshusets hemsida strider mot informationen på aimoparks laddningsnätverk karta;Information regarding EV charging on the parking garage's website conflicts with the information on aimopark's charging network map;https://www.aimopark.fi/kaupungit/turku/hansakortteli/ +Harppuunaparkki;Vallihaudankatu 1;6702966;23457475;294;2;4 x Type 2 22 kW;Sähköauton latauspiste;Elbilsladdning;EV charging;Parkkihallin omalla sivulla ja aimoparkin latausverkostokartalla eriävää tietoa latauspisteistä;Information angående elbilsladdning på parkeringshusets hemsida strider mot informationen på aimoparks laddningsnätverk karta;Information regarding EV charging on the parking garage's website conflicts with the information on aimopark's charging network map;https://www.aimopark.fi/kaupungit/turku/harppuunaparkki/ +Hesburger Kupittaa;Lemminkäisenkatu 13;6703912;23461509;48;1; ;Apuvirta, hissi, liikkumisesteisen pysäköintipaikka, sähköauton latauspiste;Elbilsladdning, hiss, parkeringsplats för funktionshindrade, startkablar;Disabled parking, elevator, EV charging, jump leads;;;;https://www.aimopark.fi/fi-fi/cities/turku/hesburger-kupittaa/ +Itäharjun kenttä;Karjakatu 37;6704308;23461903;300;;;Apuvirta;Startkablar;Jump leads;;;;https://www.aimopark.fi/kaupungit/turku/itaharjun-kentta/ +Julia;Brahenkatu 3;6704636;23459940;260;2;2 x Type 2 11 kW;Apuvirta, defibrillaattori, hissi, info, puhelimen kuuluvuusalue, sähköauton latauspiste, videovalvonta, yövartiointi;Elbilsladdning, hiss, hjärtstartare, information, kameraövervakat, mobiltäckning, nattlig övervakning, startkablar;Automated external defibrillator (AED), CCTV, elevator, EV charging, information, jump leads, mobile phone coverage, surveillance at night;;;;https://www.aimopark.fi/kaupungit/turku/julia/ +Kivikukkaro;Yliopistonkatu 29;6704478;23459294;216;3;2 x Type 2 22kW;Apuvirta, julkinen liikenne, korttimaksu, käteismaksu, mobiilimaksu, pyöräparkki, sähköauton latauspiste, videovalvonta, yövartiointi;Cykelparkering, elbilsladdning, kameraövervakat, kollektivtrafik, kortbetalning, mobilbetalning, myntbetalning, nattlig övervakning, startkablar;Bicycle parking, CCTV, coin payment, credit card payment, EV charging, jump leads, mobile payment, public transportation, surveillance at night;;;;https://www.aimopark.fi/kaupungit/turku/kivikukkaro/ +Kupittaanpuisto;Lenkkipolku;6704227;23461020;86;;;;;;;;;https://www.aimopark.fi/kaupungit/turku/kupittaanpuisto/ +Louhi;Läntinen Pitkäkatu 12 B;6704994;23459306;608;10;10 x Type 2 22 kW, 2 x CSS 60kW;Apuvirta, autopesula, defibrillaattori, hissi, info, kameratunnistus, kengänkiilltoin, liikkumistesteisen pysäköintipaikka, ostoskärryt, sateenvarjon lainaus, sähköauton latauspiste, videovalvonta;Biltvätt, elbilsladdning, hiss, hjärtstartare, information, kameraparkering, kameraövervakat, kundvagnar, paraply, parkeringsplats för funktionshindrade, skoputs, startkablar;Automated external defibrillator (AED), automatic number plate recognition, carwash, CCTV, disabled parking, elevator, EV charging, information, jump leads, shoe polisher, shopping trolley, umbrella hire;Parkkihallin omalla sivulla ja aimoparkin latausverkostokartalla eriävää tietoa latauspisteistä;Information angående elbilsladdning på parkeringshusets hemsida strider mot informationen på aimoparks laddningsnätverk karta;Information regarding EV charging on the parking garage's website conflicts with the information on aimopark's charging network map;https://www.aimopark.fi/kaupungit/turku/louhi/ +ParkCity;Joukahaisenkatu 8;6704372;23461311;990;;20 x Type 2 11 kW;Autovuokraamo, hissi, julkinen liikenne, kameratunnistus, pyöräparkki, sähköauton latauspiste, videovalvonta;Biluthyring, cykelparkering, elbilsladdning, hiss, kameraparkering, kameraövervakat, kollektivtrafik;Automatic number-plate recognition (ANPR), bicycle parking, car rental, CCTV, elevator, EV charging, public transportation;;;;https://www.aimopark.fi/kaupungit/turku/parkcity/ +Pharmacity;Lemminkäisenkatu 9;6704134;23461240;220;;;Apuvirta;Startkablar;Jump leads;;;;https://www.aimopark.fi/kaupungit/turku/pharmacity/ +P-Centrum;Kristiinankatu 8;6704432;23459438;88;;;;;;;;;https://www.europark.fi/pysakointi/p-centrum/ +P-Puutori;Brahenkatu 13;6704995;23459698;100;;;;;;;;;https://www.p-puutori.fi/ +Savitehtaankadun pysäköintitalo;Savitehtaankatu 7;6704694;23461306;100;;;;;;;;; +Scandic Plaza;Yliopistonkatu 29;6704504;23459333;42;;1 x Type 2 22 kW;Sähköauton latauspiste;Elbilsladdning;EV charging;;;;https://www.aimopark.fi/kaupungit/turku/scandic-plaza/ +Tahkonaukio;Lemminkäisenkatu 9 D;6704136;23461236;120;;;;;;;;;https://www.aimopark.fi/kaupungit/turku/tahkonaukio/ +Toriparkki;;6704748;23459727;620; ;20;Autopesula, sähköauton latauspiste;Biltvätt, elbilsladdning ;Carwash, EV charging;;;;https://www.turuntoriparkki.fi/ +Turun teknologiakiinteistöt;Tykistökatu 6;6704371;23461176;800;;;;;;;;;https://www.aimopark.fi/kaupungit/turku/teknologiakiinteistot/ +Trivium;Lemminkäisenkatu 32;6703957;23461524;461;2;4 x Type 2 22 kW;;;;;;;https://www.aimopark.fi/kaupungit/turku/trivium/ +Wiklund;Brahenkatu 8;6704702;23459855;;4;;;;;;;;https://www.europark.fi/pysakointi/p-wiklund-turku/ \ No newline at end of file From 37103a7b861fcf63a1f9c2a9a4da85ac77920d7b Mon Sep 17 00:00:00 2001 From: juuso-j <68938778+juuso-j@users.noreply.github.com> Date: Fri, 3 May 2024 09:13:35 +0300 Subject: [PATCH 34/47] Add task that imports parking garages --- mobility_data/tasks.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mobility_data/tasks.py b/mobility_data/tasks.py index 8d4fef6fa..c59023c98 100644 --- a/mobility_data/tasks.py +++ b/mobility_data/tasks.py @@ -168,6 +168,11 @@ def import_street_area_information(name="import_street_area_information"): management.call_command("import_wfs", "StreetAreaInformation") +@shared_task_email +def import_parking_garages(name="import_parking_garages"): + management.call_command("import_parking_garages") + + @shared_task_email def delete_obsolete_data(name="delete_obsolete_data"): MobileUnit.objects.filter(content_types__isnull=True).delete() From d912edb77f61683a66dff5736d17e34cf3323692 Mon Sep 17 00:00:00 2001 From: juuso-j <68938778+juuso-j@users.noreply.github.com> Date: Fri, 3 May 2024 09:17:47 +0300 Subject: [PATCH 35/47] Add parking garage importer tests --- mobility_data/tests/conftest.py | 7 ++ .../tests/data/parkkihallit_fixtures.csv | 3 + .../tests/test_import_parking_garages.py | 69 +++++++++++++++++++ 3 files changed, 79 insertions(+) create mode 100644 mobility_data/tests/data/parkkihallit_fixtures.csv create mode 100644 mobility_data/tests/test_import_parking_garages.py diff --git a/mobility_data/tests/conftest.py b/mobility_data/tests/conftest.py index 7cbb8c523..c29e64fcc 100644 --- a/mobility_data/tests/conftest.py +++ b/mobility_data/tests/conftest.py @@ -235,6 +235,13 @@ def streets(): name_sv="Bangårdsgatan", municipality_id="turku", ) + Street.objects.create( + name="Juhana Herttuan puistokatu", + name_fi="Juhana Herttuan puistokatu", + name_sv="Hertig Johans parkgata", + name_en="Juhana Herttuan puistokatu", + municipality_id="turku", + ) return Street.objects.all() diff --git a/mobility_data/tests/data/parkkihallit_fixtures.csv b/mobility_data/tests/data/parkkihallit_fixtures.csv new file mode 100644 index 000000000..1811d4590 --- /dev/null +++ b/mobility_data/tests/data/parkkihallit_fixtures.csv @@ -0,0 +1,3 @@ +Nimi;Osoite;N;E;Pysäköintipaikat;Invapaikat;Sähkölatauspaikat;Palvelut;Palvelut (ru);Palvelut (eng);Huom;Huom (ru);Huom (eng);Linkki sivuille +Auriga;Juhana Herttuan puistokatu 21;6703305;23457449;330;2;2 x Type 2 22 kW;Apuvirta, hissi, liikkumisesteisen pysäköintipaikka, sähköauton latauspiste;Elbilsladdning, hiss, parkeringsplats för funktionshindrade, startkablar;Disabled parking, elevator, EV charging, jump leads;;;;https://www.aimopark.fi/kaupungit/turku/auriga-/ +Hansakortteli;Kristiinankatu 11;6704479;23459433;162;2;2 x Type 2 11kW, 2 x CSS 90kW;Apuvirta, hissi, kameratunnistus, kengänkiillotin, liikkumisesteisen pysäköintipaikka, ostoskärryt, rengaspumppu, sateenvarjon lainaus, sähköauton latauspiste, videovalvonta, yövartiointi;Elbilsladdning, hiss, kameraparkering, kameraövervakat, kundvagnar, nattlig övervakning, paraply, parkeringsplats för funktionshindrade, skoputs, startkablar, tryckluft;Automatic number-plate recognition (ANPR), CCTV, disabled parking, elevator, EV charging, jump leads, shoe polisher, shopping trolley, surveillance at night, tyre pump, umbrella hire;Parkkihallin omalla sivulla ja aimoparkin latausverkostokartalla eriävää tietoa latauspisteistä;Information angående elbilsladdning på parkeringshusets hemsida strider mot informationen på aimoparks laddningsnätverk karta;Information regarding EV charging on the parking garage's website conflicts with the information on aimopark's charging network map;https://www.aimopark.fi/kaupungit/turku/hansakortteli/ \ No newline at end of file diff --git a/mobility_data/tests/test_import_parking_garages.py b/mobility_data/tests/test_import_parking_garages.py new file mode 100644 index 000000000..5e0149c79 --- /dev/null +++ b/mobility_data/tests/test_import_parking_garages.py @@ -0,0 +1,69 @@ +from unittest.mock import patch + +import pytest + +from mobility_data.importers.utils import ( + get_content_type_config, + get_or_create_content_type_from_config, + get_root_dir, + save_to_database, +) +from mobility_data.models import ContentType, MobileUnit + + +@pytest.mark.django_db +@patch("mobility_data.importers.parking_garages.get_full_csv_file_name") +def test_import_parking_garages( + get_full_csv_file_name_mock, + municipalities, + administrative_division_type, + administrative_division, + administrative_division_geometry, + streets, + address, +): + from mobility_data.importers.parking_garages import ( + CONTENT_TYPE_NAME, + get_parking_garage_objects, + ) + + file_name = f"{get_root_dir()}/mobility_data/tests/data/parkkihallit_fixtures.csv" + get_full_csv_file_name_mock.return_value = file_name + content_type = get_or_create_content_type_from_config(CONTENT_TYPE_NAME) + objects = get_parking_garage_objects() + num_created, num_deleted = save_to_database(objects, content_type) + assert num_created == 2 + assert num_deleted == 0 + assert ContentType.objects.filter(type_name=CONTENT_TYPE_NAME).count() == 1 + assert ( + MobileUnit.objects.filter(content_types__type_name=CONTENT_TYPE_NAME).count() + == 2 + ) + config = get_content_type_config(CONTENT_TYPE_NAME) + content_type = ContentType.objects.get(type_name=CONTENT_TYPE_NAME) + content_type.name_fi = config["name"]["fi"] + content_type.name_sv = config["name"]["sv"] + content_type.name_en = config["name"]["en"] + + auriga = MobileUnit.objects.get(name="Auriga") + assert auriga.name_sv == "Auriga" + assert auriga.name_en == "Auriga" + assert auriga.address_fi == "Juhana Herttuan puistokatu 21" + assert auriga.address_sv == "Hertig Johans parkgata 21" + assert auriga.address_en == "Juhana Herttuan puistokatu 21" + assert auriga.municipality.name == "Turku" + assert auriga.extra["parking_spaces"] == 330 + assert auriga.extra["disabled_spaces"] == 2 + assert auriga.extra["charging_stations"] == "2 x Type 2 22 kW" + assert ( + auriga.extra["services"]["fi"] + == "Apuvirta, hissi, liikkumisesteisen pysäköintipaikka, sähköauton latauspiste" + ) + assert ( + auriga.extra["services"]["sv"] + == "Elbilsladdning, hiss, parkeringsplats för funktionshindrade, startkablar" + ) + assert ( + auriga.extra["services"]["en"] + == "Disabled parking, elevator, EV charging, jump leads" + ) From b5cc7d2c66c4ceb8090bf350db2047c38cf87c66 Mon Sep 17 00:00:00 2001 From: juuso-j <68938778+juuso-j@users.noreply.github.com> Date: Fri, 3 May 2024 10:21:06 +0300 Subject: [PATCH 36/47] Mock get_full_csv_file_name --- smbackend_turku/tests/test_charging_stations.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/smbackend_turku/tests/test_charging_stations.py b/smbackend_turku/tests/test_charging_stations.py index 618e83c17..04859eae7 100644 --- a/smbackend_turku/tests/test_charging_stations.py +++ b/smbackend_turku/tests/test_charging_stations.py @@ -12,9 +12,9 @@ @pytest.mark.django_db -@patch("mobility_data.importers.charging_stations.get_csv_file_name") +@patch("mobility_data.importers.charging_stations.get_full_csv_file_name") def test_charging_stations_import( - get_csv_file_name_mock, + get_full_csv_file_name_mock, municipality, administrative_division, administrative_division_type, @@ -32,7 +32,7 @@ def test_charging_stations_import( id=42, name="Vapaa-aika", last_modified_time=datetime.now(utc_timezone) ) file_name = f"{settings.BASE_DIR}/mobility_data/tests/data/charging_stations.csv" - get_csv_file_name_mock.return_value = file_name + get_full_csv_file_name_mock.return_value = file_name import_charging_stations( logger=logger, config=config, From e08853c8cf9d8fe35b39a802feca03e40142e941 Mon Sep 17 00:00:00 2001 From: juuso-j <68938778+juuso-j@users.noreply.github.com> Date: Fri, 3 May 2024 10:25:10 +0300 Subject: [PATCH 37/47] Rename split_string_at_digit to split_string_at_first_digit --- mobility_data/importers/parking_garages.py | 4 ++-- mobility_data/importers/utils.py | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/mobility_data/importers/parking_garages.py b/mobility_data/importers/parking_garages.py index 50edacd5e..567e084d6 100644 --- a/mobility_data/importers/parking_garages.py +++ b/mobility_data/importers/parking_garages.py @@ -11,7 +11,7 @@ get_street_name_translations, LANGUAGES, MobileUnitDataBase, - split_string_at_digit, + split_string_at_first_digit, ) logger = logging.getLogger("mobility_data") @@ -51,7 +51,7 @@ def __init__(self, values): except Municipality.DoesNotExist: self.municipality = None address = values[COLUMN_MAPPINGS["address"]] - street_name, street_number = split_string_at_digit(address) + street_name, street_number = split_string_at_first_digit(address) # As the source data contains only Finnish street names, we need to get the translations translated_street_names = get_street_name_translations( street_name.strip(), self.municipality diff --git a/mobility_data/importers/utils.py b/mobility_data/importers/utils.py index d0d24565a..131b8a9d3 100644 --- a/mobility_data/importers/utils.py +++ b/mobility_data/importers/utils.py @@ -397,8 +397,7 @@ def get_full_csv_file_name(csv_file_name, content_type_name): return f"{get_root_dir()}/mobility_data/data/{csv_file_name}" -def split_string_at_digit(s): - # Split the string at the position of the first digit +def split_string_at_first_digit(s): match = re.search(r"\d", s) if match: index = match.start() From 79249247accbf72377728cffc54e170a9ccb1c58 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 16 May 2024 05:36:14 +0000 Subject: [PATCH 38/47] Bump sqlparse from 0.4.4 to 0.5.0 Bumps [sqlparse](https://github.com/andialbrecht/sqlparse) from 0.4.4 to 0.5.0. - [Changelog](https://github.com/andialbrecht/sqlparse/blob/master/CHANGELOG) - [Commits](https://github.com/andialbrecht/sqlparse/compare/0.4.4...0.5.0) --- updated-dependencies: - dependency-name: sqlparse dependency-type: indirect ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index d73186327..3cb5a9011 100644 --- a/requirements.txt +++ b/requirements.txt @@ -228,7 +228,7 @@ six==1.16.0 # python-dateutil # requests-mock # url-normalize -sqlparse==0.4.4 +sqlparse==0.5.0 # via django toml==0.10.2 # via From ac680008bacab6964d2730e6a26963b320bd38af Mon Sep 17 00:00:00 2001 From: juuso-j <68938778+juuso-j@users.noreply.github.com> Date: Wed, 22 May 2024 09:42:27 +0300 Subject: [PATCH 39/47] Update info about importing parking machines --- mobility_data/README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mobility_data/README.md b/mobility_data/README.md index b0b6c1db9..0db5ac0aa 100644 --- a/mobility_data/README.md +++ b/mobility_data/README.md @@ -170,7 +170,6 @@ To import data type: ./manage.py import_wfs BarbecuePlace ``` - ### Playgrounds ``` ./manage.py import_wfs PlayGround @@ -190,7 +189,7 @@ Imports the outdoor gym devices from the services.unit model. i.e., sets referen ### Parking machines ``` -./manage.py import_parking_machines +./manage.py import_wfs ParkingMachine ``` ### School and kindergarten accessibility areas From 56bc7da9dcb090adadcd12168727cc700025741a Mon Sep 17 00:00:00 2001 From: juuso-j <68938778+juuso-j@users.noreply.github.com> Date: Wed, 22 May 2024 09:42:58 +0300 Subject: [PATCH 40/47] Import parking machines with the WFS importer --- mobility_data/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mobility_data/tasks.py b/mobility_data/tasks.py index c59023c98..70e5e9918 100644 --- a/mobility_data/tasks.py +++ b/mobility_data/tasks.py @@ -155,7 +155,7 @@ def import_wfs(args=None, name="import_wfs"): @shared_task_email def import_parking_machines(name="import_parking_machines"): - management.call_command("import_parking_machines") + management.call_command("import_wfs", "ParkingMachine") @shared_task_email From bb47ff6c9096315e67d9bf58076cb8ba454c8eae Mon Sep 17 00:00:00 2001 From: juuso-j <68938778+juuso-j@users.noreply.github.com> Date: Wed, 22 May 2024 09:43:34 +0300 Subject: [PATCH 41/47] Fix faulty assertion --- mobility_data/tests/test_api.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/mobility_data/tests/test_api.py b/mobility_data/tests/test_api.py index 34b95695d..084c82d22 100644 --- a/mobility_data/tests/test_api.py +++ b/mobility_data/tests/test_api.py @@ -1,6 +1,4 @@ import pytest -from django.conf import settings -from django.contrib.gis.geos import Point from rest_framework.reverse import reverse @@ -47,8 +45,8 @@ def test_mobile_unit(api_client, mobile_units, content_types, unit): assert result["extra"]["test_string"] == "4242" assert result["extra"]["test_int"] == 4242 assert result["extra"]["test_float"] == 42.42 - assert result["geometry"] == Point( - 235404.6706163187, 6694437.919005549, srid=settings.DEFAULT_SRID + assert ( + result["geometry"] == "SRID=3067;POINT (235404.67061631865 6694437.919005549)" ) url = reverse( "mobility_data:mobile_units-detail", From c6e4429336801574251e42cb04b502a49bfbfff5 Mon Sep 17 00:00:00 2001 From: juuso-j <68938778+juuso-j@users.noreply.github.com> Date: Wed, 22 May 2024 09:44:11 +0300 Subject: [PATCH 42/47] Add configuration for parking machines --- .../importers/data/wfs_importer_config.yml | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/mobility_data/importers/data/wfs_importer_config.yml b/mobility_data/importers/data/wfs_importer_config.yml index 9a0bf5f76..46b99e769 100644 --- a/mobility_data/importers/data/wfs_importer_config.yml +++ b/mobility_data/importers/data/wfs_importer_config.yml @@ -1,4 +1,26 @@ features: + - content_type_name: ParkingMachine + wfs_layer: GIS:Pysakointiautomaatit + translate_fi_address_field: Osoite + translate_fi_address_municipality_id: turku + extra_fields: + maksutapa_fi: + wfs_field: Maksutapa + maksutapa_sv: + wfs_field: Maksutapa_sv + maksutapa_en: + wfs_field: Maksutapa_en + maksuvyohyke: + wfs_field: Maksuvyohyke + taksa: + wfs_field: Taksa + muu_tieto_fi: + wfs_field: Muu_tieto + muu_tieto_sv: + wfs_field: Muu_tieto_sv + muu_tieto_en: + wfs_field: Muu_tieto_en + - content_type_name: StreetAreaInformation wfs_layer: GIS:Katualueet max_features: 100000 From 35443cfba648a1108e839c95001080bba1352592 Mon Sep 17 00:00:00 2001 From: juuso-j <68938778+juuso-j@users.noreply.github.com> Date: Wed, 22 May 2024 10:12:21 +0300 Subject: [PATCH 43/47] Add feature to store address with translations --- mobility_data/importers/wfs.py | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/mobility_data/importers/wfs.py b/mobility_data/importers/wfs.py index 666a46f4a..506b4de66 100644 --- a/mobility_data/importers/wfs.py +++ b/mobility_data/importers/wfs.py @@ -11,10 +11,13 @@ from mobility_data.importers.utils import ( delete_mobile_units, get_or_create_content_type_from_config, + get_street_name_translations, + LANGUAGES, locates_in_turku, log_imported_message, MobileUnitDataBase, save_to_database, + split_string_at_first_digit, ) DEFAULT_SOURCE_DATA_SRID = 3877 @@ -40,6 +43,7 @@ def __init__(self): super().__init__() def add_feature(self, feature, config): + municipality = None create_multipolygon = False if "create_multipolygon" in config: create_multipolygon = config["create_multipolygon"] @@ -97,7 +101,6 @@ def add_feature(self, feature, config): self.municipality = Municipality.objects.filter( id=municipality_id ).first() - if "fields" in config: for attr, field in config["fields"].items(): for lang, field_name in field.items(): @@ -105,6 +108,25 @@ def add_feature(self, feature, config): if getattr(self, attr)[lang] is None: getattr(self, attr)[lang] = feature[field_name].as_string() + if "translate_fi_address_municipality_id" in config: + municipality = Municipality.objects.filter( + id=config["translate_fi_address_municipality_id"].lower() + ).first() + + if "translate_fi_address_field" in config: + address = feature[config["translate_fi_address_field"]].as_string() + if not address[0].isdigit(): + street_name, street_number = split_string_at_first_digit(address) + else: + street_name = address + street_number = "" + muni = municipality if municipality else self.municipality + translated_street_names = get_street_name_translations( + street_name.strip(), muni + ) + for lang in LANGUAGES: + self.address[lang] = f"{translated_street_names[lang]} {street_number}" + if "extra_fields" in config: for field, attr in config["extra_fields"].items(): val = None @@ -168,9 +190,12 @@ def import_wfs_feature(config, data_file=None): assert len(ds) == 1 layer = ds[0] for feature in layer: - object = MobilityData() - if object.add_feature(feature, config): - objects.append(object) + try: + object = MobilityData() + if object.add_feature(feature, config): + objects.append(object) + except Exception as e: + logger.warning(f"Discarding feature {feature}, cause: {e}") content_type = get_or_create_content_type_from_config(config["content_type_name"]) num_created, num_deleted = save_to_database(objects, content_type) log_imported_message(logger, content_type, num_created, num_deleted) From 1fa7c6672ba9b8266b9a6101822578d2c6a288e5 Mon Sep 17 00:00:00 2001 From: juuso-j <68938778+juuso-j@users.noreply.github.com> Date: Wed, 22 May 2024 10:14:47 +0300 Subject: [PATCH 44/47] Add translate_fi_address_field and translate_fi_address_municipality_id --- .../importers/data/wfs_importer_config_example.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/mobility_data/importers/data/wfs_importer_config_example.yml b/mobility_data/importers/data/wfs_importer_config_example.yml index ea98129bf..4ab3ea159 100644 --- a/mobility_data/importers/data/wfs_importer_config_example.yml +++ b/mobility_data/importers/data/wfs_importer_config_example.yml @@ -16,6 +16,13 @@ features: municipality: muni_field # Optional, if set, include only if geometry is inside the boundarys of Turku, default=False locates_in_turku: True + # Optional, Add the field from which the Finnish address is fetched + # and get the Swedish and the English translations to it. + # Suitable if only Finnish address is available in the source data. + translate_fi_address_field: field_with_finnish_address + # Required if "translate_fi_address_field" is used. + # municipality id of the municipality from which to lookup the address translations. + translate_fi_address_municipality_id: turku # Optional, include only if 'field_name' contains the given string. include: field_name: this_must_be_in_field_name From 100336e38d434d9737bbd44a7b1419c8a6a94ec2 Mon Sep 17 00:00:00 2001 From: juuso-j <68938778+juuso-j@users.noreply.github.com> Date: Wed, 22 May 2024 10:16:27 +0300 Subject: [PATCH 45/47] Add comment that the importer is deprecated --- .../management/commands/import_parking_machines.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/mobility_data/management/commands/import_parking_machines.py b/mobility_data/management/commands/import_parking_machines.py index 97a032f87..39a764b6d 100644 --- a/mobility_data/management/commands/import_parking_machines.py +++ b/mobility_data/management/commands/import_parking_machines.py @@ -1,3 +1,9 @@ +""" +Deprecated, the parking machines will in future be imported with the WFS importer. +All code related to this importer can be removed after the importing +from WFS feature is in the production environment. +""" + import logging from django.core.management import BaseCommand From 9ecf9bb198dedd9a14a5af24472e666e2e779532 Mon Sep 17 00:00:00 2001 From: juuso-j <68938778+juuso-j@users.noreply.github.com> Date: Wed, 22 May 2024 10:25:22 +0300 Subject: [PATCH 46/47] Fix comment for translate_fi_address_field --- mobility_data/importers/data/wfs_importer_config_example.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mobility_data/importers/data/wfs_importer_config_example.yml b/mobility_data/importers/data/wfs_importer_config_example.yml index 4ab3ea159..640d717fe 100644 --- a/mobility_data/importers/data/wfs_importer_config_example.yml +++ b/mobility_data/importers/data/wfs_importer_config_example.yml @@ -17,7 +17,7 @@ features: # Optional, if set, include only if geometry is inside the boundarys of Turku, default=False locates_in_turku: True # Optional, Add the field from which the Finnish address is fetched - # and get the Swedish and the English translations to it. + # and the importer will assign and lookup the Swedish and the English translations to it. # Suitable if only Finnish address is available in the source data. translate_fi_address_field: field_with_finnish_address # Required if "translate_fi_address_field" is used. From 9d1ef0954e937939baa9335b84ab3becb2e2ec4b Mon Sep 17 00:00:00 2001 From: juuso-j <68938778+juuso-j@users.noreply.github.com> Date: Wed, 22 May 2024 10:57:04 +0300 Subject: [PATCH 47/47] Fix geometry assertion --- mobility_data/tests/test_api.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mobility_data/tests/test_api.py b/mobility_data/tests/test_api.py index 084c82d22..2946a09ea 100644 --- a/mobility_data/tests/test_api.py +++ b/mobility_data/tests/test_api.py @@ -45,9 +45,7 @@ def test_mobile_unit(api_client, mobile_units, content_types, unit): assert result["extra"]["test_string"] == "4242" assert result["extra"]["test_int"] == 4242 assert result["extra"]["test_float"] == 42.42 - assert ( - result["geometry"] == "SRID=3067;POINT (235404.67061631865 6694437.919005549)" - ) + assert "POINT" in result["geometry"] url = reverse( "mobility_data:mobile_units-detail", args=["ba6c2903-d36f-4c61-b828-19084fc7a64b"],