diff --git a/eco_counter/management/commands/import_counter_data.py b/eco_counter/management/commands/import_counter_data.py index b1ad9207c..7b36e6ada 100644 --- a/eco_counter/management/commands/import_counter_data.py +++ b/eco_counter/management/commands/import_counter_data.py @@ -387,72 +387,100 @@ def save_telraam_data(start_time): ) -def handle_initial_import(initial_import_counters): - delete_tables(csv_data_sources=initial_import_counters) - for counter in initial_import_counters: - ImportState.objects.filter(csv_data_source=counter).delete() - ImportState.objects.create(csv_data_source=counter) - logger.info(f"Retrieving stations for {counter}.") - # As Telraam counters are dynamic, create after CSV data is processed - if counter == TELRAAM_COUNTER: - Station.objects.filter(csv_data_source=counter).delete() +def handle_initial_import(counter): + logger.info(f"Deleting tables for: {counter}") + delete_tables(csv_data_sources=[counter]) + ImportState.objects.filter(csv_data_source=counter).delete() + import_state = ImportState.objects.create(csv_data_source=counter) + logger.info(f"Retrieving stations for {counter}.") + # As Telraam counters are dynamic, create after CSV data is processed + if counter == TELRAAM_COUNTER: + Station.objects.filter(csv_data_source=counter).delete() + else: + save_stations(counter) + return import_state + + +def get_start_time(counter, import_state): + if import_state.current_year_number and import_state.current_month_number: + start_time = "{year}-{month}-1T00:00".format( + year=import_state.current_year_number, + month=import_state.current_month_number, + ) + else: + start_month = ( + TELRAAM_COUNTER_START_MONTH if counter == TELRAAM_COUNTER else "01" + ) + start_time = f"{COUNTER_START_YEARS[counter]}-{start_month}-01" + + start_time = dateutil.parser.parse(start_time) + start_time = TIMEZONE.localize(start_time) + # The timeformat for the input data is : 2020-03-01T00:00 + # Convert starting time to input datas timeformat + return start_time + + +def get_csv_data(counter, import_state, start_time, verbose=True): + match counter: + # case COUNTERS.TELRAAM_COUNTER: + # Telraam counters are handled differently due to their dynamic nature + case COUNTERS.LAM_COUNTER: + csv_data = get_lam_counter_csv(start_time.date()) + case COUNTERS.ECO_COUNTER: + csv_data = get_eco_counter_csv() + case COUNTERS.TRAFFIC_COUNTER: + if import_state.current_year_number: + start_year = import_state.current_year_number + else: + start_year = TRAFFIC_COUNTER_START_YEAR + csv_data = get_traffic_counter_csv(start_year=start_year) + + start_time_string = start_time.strftime("%Y-%m-%dT%H:%M") + start_index = csv_data.index[ + csv_data[INDEX_COLUMN_NAME] == start_time_string + ].values[0] + if verbose: + # As LAM data is fetched with a timespan, no index data is available, instead display start_time. + if counter == LAM_COUNTER: + logger.info(f"Starting saving observations at time:{start_time}") else: - save_stations(counter) + logger.info(f"Starting saving observations at index:{start_index}") + + csv_data = csv_data[start_index:] + return csv_data -def import_data(counters): +def import_data(counters, initial_import=False, force=False): for counter in counters: logger.info(f"Importing/counting data for {counter}...") import_state = ImportState.objects.filter(csv_data_source=counter).first() + + # Before deleting state and data, check that data is available. + if not force and import_state and initial_import: + start_time = get_start_time(counter, import_state) + csv_data = get_csv_data(counter, import_state, start_time, verbose=False) + if len(csv_data) == 0: + logger.info( + "No data to retrieve, skipping initial import. Use --force to discard." + ) + continue + + if initial_import: + handle_initial_import(counter) + import_state = ImportState.objects.filter(csv_data_source=counter).first() + if not import_state: logger.error( "ImportState instance not found, try importing with the '--init' argument." ) break - if import_state.current_year_number and import_state.current_month_number: - start_time = "{year}-{month}-1T00:00".format( - year=import_state.current_year_number, - month=import_state.current_month_number, - ) - else: - start_month = ( - TELRAAM_COUNTER_START_MONTH if counter == TELRAAM_COUNTER else "01" - ) - start_time = f"{COUNTER_START_YEARS[counter]}-{start_month}-01" - - start_time = dateutil.parser.parse(start_time) - start_time = TIMEZONE.localize(start_time) - # The timeformat for the input data is : 2020-03-01T00:00 - # Convert starting time to input datas timeformat - start_time_string = start_time.strftime("%Y-%m-%dT%H:%M") - match counter: - # case COUNTERS.TELRAAM_COUNTER: - # Telraam counters are handled differently due to their dynamic nature - case COUNTERS.LAM_COUNTER: - csv_data = get_lam_counter_csv(start_time.date()) - case COUNTERS.ECO_COUNTER: - csv_data = get_eco_counter_csv() - case COUNTERS.TRAFFIC_COUNTER: - if import_state.current_year_number: - start_year = import_state.current_year_number - else: - start_year = TRAFFIC_COUNTER_START_YEAR - csv_data = get_traffic_counter_csv(start_year=start_year) + + start_time = get_start_time(counter, import_state) if counter == TELRAAM_COUNTER: save_telraam_data(start_time) else: - start_index = csv_data.index[ - csv_data[INDEX_COLUMN_NAME] == start_time_string - ].values[0] - # As LAM data is fetched with a timespan, no index data is available, instead - # show time. - if counter == LAM_COUNTER: - logger.info(f"Starting saving observations at time:{start_time}") - else: - logger.info(f"Starting saving observations at index:{start_index}") - - csv_data = csv_data[start_index:] + csv_data = get_csv_data(counter, import_state, start_time) save_observations( csv_data, start_time, @@ -464,7 +492,6 @@ def import_data(counters): def add_additional_data_to_stations(csv_data_source): - logger.info(f"Updating {csv_data_source} stations informations...") for station in Station.objects.filter(csv_data_source=csv_data_source): station.data_from_date = get_data_from_date(station) @@ -500,10 +527,17 @@ def add_arguments(self, parser): default=False, help=f"Import specific counter(s) data, choices are: {COUNTER_CHOICES_STR}.", ) + parser.add_argument( + "--force", + action="store_true", + help="Force the initial import and discard data check", + ) def handle(self, *args, **options): initial_import_counters = None start_time = None + initial_import = False + force = options.get("force", False) if options["initial_import"]: if len(options["initial_import"]) == 0: raise CommandError( @@ -511,9 +545,7 @@ def handle(self, *args, **options): ) else: initial_import_counters = options["initial_import"] - check_counters_argument(initial_import_counters) - logger.info(f"Deleting tables for: {initial_import_counters}") - handle_initial_import(initial_import_counters) + initial_import = True if options["test_counter"]: logger.info("Testing eco_counter importer.") @@ -536,7 +568,7 @@ def handle(self, *args, **options): if not initial_import_counters: # run with counters argument counters = options["counters"] - check_counters_argument(counters) else: counters = initial_import_counters - import_data(counters) + check_counters_argument(counters) + import_data(counters, initial_import, force) diff --git a/eco_counter/management/commands/utils.py b/eco_counter/management/commands/utils.py index e358db319..285c4f45e 100644 --- a/eco_counter/management/commands/utils.py +++ b/eco_counter/management/commands/utils.py @@ -258,6 +258,11 @@ def get_traffic_counter_csv(start_year=2015): def get_lam_dataframe(csv_url): response = requests.get(csv_url, headers=LAM_STATION_USER_HEADER) + assert ( + response.status_code == 200 + ), "Fetching LAM data from {} , status code {}".format( + settings.ECO_COUNTER_STATIONS_URL, response.status_code + ) string_data = response.content csv_data = pd.read_csv(io.StringIO(string_data.decode("utf-8")), delimiter=";") return csv_data diff --git a/eco_counter/tasks.py b/eco_counter/tasks.py index af525b84a..b15b4c82f 100644 --- a/eco_counter/tasks.py +++ b/eco_counter/tasks.py @@ -13,6 +13,11 @@ def initial_import_counter_data(args, name="initial_import_counter_data"): management.call_command("import_counter_data", "--init", args) +@shared_task_email +def force_initial_import_counter_data(args, name="force_initial_import_counter_data"): + management.call_command("import_counter_data", "--force", "--init", args) + + @shared_task_email def delete_counter_data(args, name="delete_counter_data"): management.call_command("delete_counter_data", "--counters", args) diff --git a/eco_counter/tests/test_import_counter_data.py b/eco_counter/tests/test_import_counter_data.py index bad84144e..3ed15b6da 100644 --- a/eco_counter/tests/test_import_counter_data.py +++ b/eco_counter/tests/test_import_counter_data.py @@ -6,6 +6,7 @@ The main purpose of these tests are to verify that the importer imports and calculates the data correctly. """ + import calendar from datetime import datetime, timedelta from io import StringIO diff --git a/environment_data/api/utils.py b/environment_data/api/utils.py index 4c78d2206..2b69b104b 100644 --- a/environment_data/api/utils.py +++ b/environment_data/api/utils.py @@ -1,55 +1,127 @@ -from datetime import datetime - -from rest_framework.exceptions import ParseError - -from .constants import DATA_TYPES, DATETIME_FORMATS, DAY, HOUR, MONTH, WEEK, YEAR - - -def validate_timestamp(timestamp_str, data_type): - time_format = DATETIME_FORMATS[data_type] - try: - datetime.strptime(timestamp_str, time_format) - except ValueError: - return f"{timestamp_str} invalid format date format, valid format for type {data_type} is {time_format}" - return None - - -def get_start_and_end_and_year(filters, data_type): - start = filters.get("start", None) - end = filters.get("end", None) - year = filters.get("year", None) - - if not start or not end: - raise ParseError("Supply both 'start' and 'end' parameters") - - if YEAR not in data_type and not year: - raise ParseError("Supply 'year' parameter") - - res1 = None - res2 = None - match data_type: - case DATA_TYPES.DAY: - res1 = validate_timestamp(start, DAY) - res2 = validate_timestamp(end, DAY) - case DATA_TYPES.HOUR: - res1 = validate_timestamp(start, HOUR) - res2 = validate_timestamp(end, HOUR) - case DATA_TYPES.WEEK: - res1 = validate_timestamp(start, WEEK) - res2 = validate_timestamp(end, WEEK) - case DATA_TYPES.MONTH: - res1 = validate_timestamp(start, MONTH) - res2 = validate_timestamp(end, MONTH) - case DATA_TYPES.YEAR: - res1 = validate_timestamp(start, YEAR) - res2 = validate_timestamp(end, YEAR) - - if res1: - raise ParseError(res1) - if res2: - raise ParseError(res2) - - if HOUR in data_type or DAY in data_type: - start = f"{year}-{start}" - end = f"{year}-{end}" - return start, end, year +import django_filters + +from environment_data.models import ( + DayData, + HourData, + MonthData, + Station, + WeekData, + YearData, +) + + +class StationFilterSet(django_filters.FilterSet): + geo_id = django_filters.NumberFilter(field_name="geo_id", lookup_expr="exact") + name = django_filters.CharFilter(lookup_expr="icontains") + + class Meta: + model = Station + fields = {"data_type": ["exact"]} + + +class BaseFilterSet(django_filters.FilterSet): + + station_id = django_filters.NumberFilter(field_name="station") + + class Meta: + fields = {"station": ["exact"]} + + def get_date(self, year_number, month_and_day): + return f"{year_number}-{month_and_day}" + + +class YearDataFilterSet(django_filters.FilterSet): + station_id = django_filters.NumberFilter(field_name="station") + start = django_filters.NumberFilter( + field_name="year__year_number", lookup_expr="gte" + ) + end = django_filters.NumberFilter(field_name="year__year_number", lookup_expr="lte") + + class Meta: + model = YearData + fields = {"station": ["exact"]} + + +class MonthDataFilterSet(BaseFilterSet): + def filter_year(self, queryset, field, year): + return queryset.filter(month__year__year_number=year) + + year = django_filters.NumberFilter(method="filter_year") + start = django_filters.NumberFilter( + field_name="month__month_number", lookup_expr="gte" + ) + end = django_filters.NumberFilter( + field_name="month__month_number", lookup_expr="lte" + ) + + class Meta: + model = MonthData + fields = BaseFilterSet.Meta.fields + + +class WeekDataFilterSet(BaseFilterSet): + def filter_year(self, queryset, field, year): + return queryset.filter(week__years__year_number=year) + + year = django_filters.NumberFilter(method="filter_year") + start = django_filters.NumberFilter( + field_name="week__week_number", lookup_expr="gte" + ) + end = django_filters.NumberFilter(field_name="week__week_number", lookup_expr="lte") + + class Meta: + model = WeekData + fields = BaseFilterSet.Meta.fields + + +class DateDataFilterSet(BaseFilterSet): + DATE_MODEL_NAME = None + YEAR_LOOKUP = None + + def filter_year(self, queryset, field, year): + return queryset.filter(**{f"{self.DATE_MODEL_NAME}__year__year_number": year}) + + def filter_start(self, queryset, field, start): + first = queryset.first() + if first: + lookup = first + if self.YEAR_LOOKUP: + lookup = getattr(first, self.YEAR_LOOKUP) + date = self.get_date(lookup.day.year.year_number, start) + return queryset.filter(**{f"{self.DATE_MODEL_NAME}__date__gte": date}) + else: + return queryset.none() + + def filter_end(self, queryset, field, end): + first = queryset.first() + if first: + lookup = first + if self.YEAR_LOOKUP: + lookup = getattr(first, self.YEAR_LOOKUP) + date = self.get_date(lookup.day.year.year_number, end) + return queryset.filter(**{f"{self.DATE_MODEL_NAME}__date__lte": date}) + else: + return queryset.none() + + year = django_filters.NumberFilter(method="filter_year") + start = django_filters.CharFilter(method="filter_start") + end = django_filters.CharFilter(method="filter_end") + + +class DayDataFilterSet(DateDataFilterSet): + + DATE_MODEL_NAME = "day" + + class Meta: + model = DayData + fields = BaseFilterSet.Meta.fields + + +class HourDataFilterSet(DateDataFilterSet): + + DATE_MODEL_NAME = "hour__day" + YEAR_LOOKUP = "hour" + + class Meta: + model = HourData + fields = BaseFilterSet.Meta.fields diff --git a/environment_data/api/views.py b/environment_data/api/views.py index c52a89ce6..da4dc8766 100644 --- a/environment_data/api/views.py +++ b/environment_data/api/views.py @@ -1,12 +1,12 @@ from django.utils.decorators import method_decorator from django.views.decorators.cache import cache_page +from django_filters.rest_framework import DjangoFilterBackend from drf_spectacular.utils import extend_schema, extend_schema_view -from rest_framework import status, viewsets -from rest_framework.response import Response +from rest_framework import viewsets +from rest_framework.exceptions import ValidationError from environment_data.api.constants import ( DATA_TYPES, - DATETIME_FORMATS, ENVIRONMENT_DATA_PARAMS, ENVIRONMENT_STATION_PARAMS, ) @@ -19,7 +19,7 @@ WeekDataSerializer, YearDataSerializer, ) -from environment_data.constants import DATA_TYPES_LIST, VALID_DATA_TYPE_CHOICES +from environment_data.constants import DATA_TYPES_LIST from environment_data.models import ( DayData, HourData, @@ -30,7 +30,14 @@ YearData, ) -from .utils import get_start_and_end_and_year +from .utils import ( + DayDataFilterSet, + HourDataFilterSet, + MonthDataFilterSet, + StationFilterSet, + WeekDataFilterSet, + YearDataFilterSet, +) @extend_schema_view( @@ -42,25 +49,12 @@ class StationViewSet(viewsets.ReadOnlyModelViewSet): queryset = Station.objects.all() serializer_class = StationSerializer + filter_backends = [DjangoFilterBackend] + filterset_class = StationFilterSet @method_decorator(cache_page(60 * 60)) def list(self, request, *args, **kwargs): - queryset = self.queryset - filters = self.request.query_params - data_type = filters.get("data_type", None) - if data_type: - data_type = str(data_type).upper() - if data_type not in DATA_TYPES_LIST: - return Response( - f"Invalid data type, valid types are: {VALID_DATA_TYPE_CHOICES}", - status=status.HTTP_400_BAD_REQUEST, - ) - - queryset = queryset.filter(data_type=data_type) - - page = self.paginate_queryset(queryset) - serializer = self.serializer_class(page, many=True) - return self.get_paginated_response(serializer.data) + return super().list(request, *args, **kwargs) @extend_schema_view( @@ -82,78 +76,64 @@ class ParameterViewSet(viewsets.ReadOnlyModelViewSet): ) ) class DataViewSet(viewsets.GenericViewSet): - queryset = YearData.objects.all() - def list(self, request, *args, **kwargs): - filters = self.request.query_params - station_id = filters.get("station_id", None) - if not station_id: - return Response( - "Supply 'station_id' parameter.", status=status.HTTP_400_BAD_REQUEST - ) - else: - try: - station = Station.objects.get(id=station_id) - except Station.DoesNotExist: - return Response( - f"Station with id {station_id} not found.", - status=status.HTTP_400_BAD_REQUEST, - ) + queryset = [] + serializer_class = None - data_type = filters.get("type", None) - if not data_type: - return Response( - "Supply 'type' parameter", status=status.HTTP_400_BAD_REQUEST - ) - else: - data_type = data_type.lower() + def get_serializer_class(self): + data_type = self.request.query_params.get("type", "").lower() + match data_type: + case DATA_TYPES.HOUR: + return HourDataSerializer + case DATA_TYPES.DAY: + return DayDataSerializer + case DATA_TYPES.WEEK: + return WeekDataSerializer + case DATA_TYPES.MONTH: + return MonthDataSerializer + case DATA_TYPES.YEAR: + return YearDataSerializer + case _: + raise ValidationError( + f"Provide a valid 'type' parameter. Valid types are: {', '.join([f for f in DATA_TYPES_LIST])}", + ) - start, end, year = get_start_and_end_and_year(filters, data_type) + def get_queryset(self): + params = self.request.query_params + data_type = params.get("type", "").lower() + queryset = YearData.objects.all() match data_type: case DATA_TYPES.HOUR: - queryset = HourData.objects.filter( - station=station, - hour__day__year__year_number=year, - hour__day__date__gte=start, - hour__day__date__lte=end, + filter_set = HourDataFilterSet( + data=params, queryset=HourData.objects.all() ) - serializer_class = HourDataSerializer case DATA_TYPES.DAY: - queryset = DayData.objects.filter( - station=station, - day__date__gte=start, - day__date__lte=end, - day__year__year_number=year, + filter_set = DayDataFilterSet( + data=params, queryset=DayData.objects.all() ) - serializer_class = DayDataSerializer case DATA_TYPES.WEEK: - serializer_class = WeekDataSerializer - queryset = WeekData.objects.filter( - week__years__year_number=year, - station=station, - week__week_number__gte=start, - week__week_number__lte=end, + filter_set = WeekDataFilterSet( + data=params, queryset=WeekData.objects.all() ) case DATA_TYPES.MONTH: - serializer_class = MonthDataSerializer - queryset = MonthData.objects.filter( - month__year__year_number=year, - station=station, - month__month_number__gte=start, - month__month_number__lte=end, + filter_set = MonthDataFilterSet( + data=params, queryset=MonthData.objects.all() ) case DATA_TYPES.YEAR: - serializer_class = YearDataSerializer - queryset = YearData.objects.filter( - station=station, - year__year_number__gte=start, - year__year_number__lte=end, + filter_set = YearDataFilterSet( + data=params, queryset=YearData.objects.all() ) case _: - return Response( - f"Provide a valid 'type' parameters. Valid types are: {', '.join([f for f in DATETIME_FORMATS])}", - status=status.HTTP_400_BAD_REQUEST, + raise ValidationError( + f"Provide a valid 'type' parameter. Valid types are: {', '.join([f for f in DATA_TYPES_LIST])}", ) + if filter_set and filter_set.is_valid(): + return filter_set.qs + else: + return queryset.none() + + def list(self, request, *args, **kwargs): + queryset = self.get_queryset() page = self.paginate_queryset(queryset) - serializer = serializer_class(page, many=True) + serializer = self.get_serializer_class()(page, many=True) return self.get_paginated_response(serializer.data) diff --git a/environment_data/management/commands/weather_observation_utils.py b/environment_data/management/commands/weather_observation_utils.py index 3bde67721..b5d510f50 100644 --- a/environment_data/management/commands/weather_observation_utils.py +++ b/environment_data/management/commands/weather_observation_utils.py @@ -45,9 +45,9 @@ def get_dataframe(stations, from_year=START_YEAR, from_month=1, initial_import=F if not initial_import and from_year == current_date_time.year: params["startTime"] = f"{from_year}-{from_month}-01T00:00Z" else: - params[ - "startTime" - ] = f"{start_date_time.year}-{start_date_time.month}-01T00:00Z" + params["startTime"] = ( + f"{start_date_time.year}-{start_date_time.month}-01T00:00Z" + ) if current_date_time - relativedelta(months=1) < start_date_time: params["endTime"] = current_date_time.strftime(TIME_FORMAT) else: @@ -56,9 +56,9 @@ def get_dataframe(stations, from_year=START_YEAR, from_month=1, initial_import=F + relativedelta(months=1) - relativedelta(hours=1) ) - params[ - "endTime" - ] = f"{tmp_time.year}-{tmp_time.month}-{tmp_time.day}T23:00Z" + params["endTime"] = ( + f"{tmp_time.year}-{tmp_time.month}-{tmp_time.day}T23:00Z" + ) response = REQUEST_SESSION.get(DATA_URL, params=params) logger.info(f"Requested data from: {response.url}") diff --git a/environment_data/tests/conftest.py b/environment_data/tests/conftest.py index 6dad1c53b..fe65f6c48 100644 --- a/environment_data/tests/conftest.py +++ b/environment_data/tests/conftest.py @@ -49,6 +49,8 @@ def stations(parameters): @pytest.fixture def measurements(parameters): Measurement.objects.create(id=1, parameter=Parameter.objects.get(id=1), value=1.5) + Measurement.objects.create(id=2, parameter=Parameter.objects.get(id=2), value=2) + return Measurement.objects.all() @@ -58,7 +60,6 @@ def parameters(): Parameter.objects.create(id=1, name="AQINDEX_PT1H_avg") Parameter.objects.create(id=2, name="NO2_PT1H_avg") Parameter.objects.create(id=3, name="WS_PT1H_avg") - return Parameter.objects.all() @@ -66,6 +67,7 @@ def parameters(): @pytest.fixture def years(): Year.objects.create(id=1, year_number=2023) + Year.objects.create(id=2, year_number=2022) return Year.objects.all() @@ -73,6 +75,7 @@ def years(): @pytest.fixture def months(years): Month.objects.create(month_number=1, year=years[0]) + Month.objects.create(month_number=1, year=years[1]) return Month.objects.all() @@ -81,6 +84,8 @@ def months(years): def weeks(years): week = Week.objects.create(week_number=1) week.years.add(years[0]) + week = Week.objects.create(week_number=1) + week.years.add(years[1]) return Week.objects.all() @@ -93,6 +98,12 @@ def days(years, months, weeks): month=months[0], week=weeks[0], ) + Day.objects.create( + date=parser.parse("2022-01-01 00:00:00"), + year=years[1], + month=months[1], + week=weeks[1], + ) return Day.objects.all() @@ -100,6 +111,7 @@ def days(years, months, weeks): @pytest.fixture def hours(days): Hour.objects.create(day=days[0], hour_number=0) + Hour.objects.create(day=days[1], hour_number=0) return Hour.objects.all() @@ -108,6 +120,8 @@ def hours(days): def year_datas(stations, years, measurements): year_data = YearData.objects.create(station=stations[0], year=years[0]) year_data.measurements.add(measurements[0]) + year_data = YearData.objects.create(station=stations[0], year=years[1]) + year_data.measurements.add(measurements[1]) return YearData.objects.all() @@ -116,6 +130,8 @@ def year_datas(stations, years, measurements): def month_datas(stations, months, measurements): month_data = MonthData.objects.create(station=stations[0], month=months[0]) month_data.measurements.add(measurements[0]) + month_data = MonthData.objects.create(station=stations[0], month=months[1]) + month_data.measurements.add(measurements[1]) return MonthData.objects.all() @@ -124,14 +140,17 @@ def month_datas(stations, months, measurements): def week_datas(stations, weeks, measurements): week_data = WeekData.objects.create(station=stations[0], week=weeks[0]) week_data.measurements.add(measurements[0]) + week_data = WeekData.objects.create(station=stations[0], week=weeks[1]) + week_data.measurements.add(measurements[1]) return WeekData.objects.all() -@pytest.mark.django_db @pytest.fixture def day_datas(stations, days, measurements): day_data = DayData.objects.create(station=stations[0], day=days[0]) day_data.measurements.add(measurements[0]) + day_data = DayData.objects.create(station=stations[0], day=days[1]) + day_data.measurements.add(measurements[1]) return DayData.objects.all() @@ -140,4 +159,6 @@ def day_datas(stations, days, measurements): def hour_datas(stations, hours, measurements): hour_data = HourData.objects.create(station=stations[0], hour=hours[0]) hour_data.measurements.add(measurements[0]) + hour_data = HourData.objects.create(station=stations[0], hour=hours[1]) + hour_data.measurements.add(measurements[1]) return HourData.objects.all() diff --git a/environment_data/tests/test_api.py b/environment_data/tests/test_api.py index 6635fc5c0..aecb46962 100644 --- a/environment_data/tests/test_api.py +++ b/environment_data/tests/test_api.py @@ -48,6 +48,7 @@ def test_day_data(api_client, day_datas, parameters): ) response = api_client.get(url) assert response.status_code == 200 + assert len(response.json()["results"]) == 1 json_data = response.json()["results"][0] assert len(json_data["measurements"]) == 1 assert json_data["measurements"][0]["value"] == 1.5 @@ -55,6 +56,17 @@ def test_day_data(api_client, day_datas, parameters): assert json_data["date"] == "2023-01-01" +@pytest.mark.django_db +def test_day_data_non_existing_year(api_client, day_datas, parameters): + url = ( + reverse("environment_data:data-list") + + "?year=2020&start=01-01&end=02-01&station_id=1&type=day" + ) + response = api_client.get(url) + assert response.status_code == 200 + assert len(response.json()["results"]) == 0 + + @pytest.mark.django_db def test_week_data(api_client, week_datas, parameters): url = ( @@ -63,6 +75,7 @@ def test_week_data(api_client, week_datas, parameters): ) response = api_client.get(url) assert response.status_code == 200 + assert len(response.json()["results"]) == 1 json_data = response.json()["results"][0] assert len(json_data["measurements"]) == 1 assert json_data["measurements"][0]["value"] == 1.5 @@ -78,11 +91,38 @@ def test_month_data(api_client, month_datas, parameters): ) response = api_client.get(url) assert response.status_code == 200 + assert len(response.json()["results"]) == 1 json_data = response.json()["results"][0] assert len(json_data["measurements"]) == 1 assert json_data["measurements"][0]["value"] == 1.5 assert json_data["measurements"][0]["parameter"] == parameters[0].name assert json_data["month_number"] == 1 + url = ( + reverse("environment_data:data-list") + + "?year=2023&start=1&end=1&station_id=411&type=month" + ) + response = api_client.get(url) + assert len(response.json()["results"]) == 0 + + +@pytest.mark.django_db +def test_month_data_non_existing_year(api_client, month_datas, parameters): + url = ( + reverse("environment_data:data-list") + + "?year=2020&start=1&end=1&station_id=411&type=month" + ) + response = api_client.get(url) + assert len(response.json()["results"]) == 0 + + +@pytest.mark.django_db +def test_month_data_chars_in_arguments(api_client, month_datas, parameters): + url = ( + reverse("environment_data:data-list") + + "?year=foo&start=abc&end=dce&station_id=foobar&type=month" + ) + response = api_client.get(url) + assert len(response.json()["results"]) == 0 @pytest.mark.django_db @@ -94,6 +134,7 @@ def test_year_data(api_client, year_datas, parameters): response = api_client.get(url) assert response.status_code == 200 json_data = response.json()["results"][0] + assert len(response.json()["results"]) == 1 assert len(json_data["measurements"]) == 1 assert json_data["measurements"][0]["value"] == 1.5 assert json_data["measurements"][0]["parameter"] == parameters[0].name diff --git a/exceptional_situations/management/commands/import_traffic_situations.py b/exceptional_situations/management/commands/import_traffic_situations.py index 80e23c37b..0ddcc7c7e 100644 --- a/exceptional_situations/management/commands/import_traffic_situations.py +++ b/exceptional_situations/management/commands/import_traffic_situations.py @@ -34,7 +34,8 @@ "?inactiveHours=0&includeAreaGeometry=true&situationType=TRAFFIC_ANNOUNCEMENT" ) URLS = [ROAD_WORK_URL, TRAFFIC_ANNOUNCEMENT_URL] -DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ" +DATETIME_FORMATS = ["%Y-%m-%dT%H:%M:%S.%fZ", "%Y-%m-%dT%H:%M:%SZ"] + SOUTHWEST_FINLAND_POLYGON = Polygon( SOUTHWEST_FINLAND_BOUNDARY, srid=SOUTHWEST_FINLAND_BOUNDARY_SRID ) @@ -44,19 +45,21 @@ class Command(BaseCommand): def get_geos_geometry(self, feature_data): return GEOSGeometry(str(feature_data["geometry"]), srid=PROJECTION_SRID) - def create_location(self, geometry, announcement_data): + def create_location(self, geometry, announcement_data, announcement): location = None details = announcement_data["locationDetails"].get("roadAddressLocation", None) - details.update(announcement_data.get("location", None)) + if details: + details.update(announcement_data.get("location", None)) filter = { "geometry": geometry, "location": location, "details": details, + "announcement": announcement, } situation_location = SituationLocation.objects.create(**filter) return situation_location - def create_announcement(self, announcement_data, situation_location): + def create_announcement(self, announcement_data): title = announcement_data.get("title", "") description = announcement_data["location"].get("description", "") additional_info = {} @@ -81,7 +84,6 @@ def create_announcement(self, announcement_data, situation_location): if end_time: end_time = parser.parse(end_time) filter = { - "location": situation_location, "title": title, "description": description, "additional_info": additional_info, @@ -110,11 +112,21 @@ def handle(self, *args, **options): if not properties: continue situation_id = properties.get("situationId", None) - release_time = properties.get("releaseTime", None) - release_time = datetime.strptime(release_time, DATETIME_FORMAT).replace( - microsecond=0 - ) - release_time = timezone.make_aware(release_time, timezone.utc) + release_time_str = properties.get("releaseTime", None) + if release_time_str: + for format_str in DATETIME_FORMATS: + try: + release_time = datetime.strptime( + release_time_str, format_str + ) + except ValueError: + pass + else: + break + + if release_time.microsecond != 0: + release_time.replace(microsecond=0) + release_time = timezone.make_aware(release_time, timezone.utc) type_name = properties.get("situationType", None) sub_type_name = properties.get("trafficAnnouncementType", None) @@ -127,18 +139,18 @@ def handle(self, *args, **options): "situation_id": situation_id, "situation_type": situation_type, } - situation, _ = Situation.objects.get_or_create(**filter) + situation, created = Situation.objects.get_or_create(**filter) situation.release_time = release_time situation.save() - - SituationAnnouncement.objects.filter(situation=situation).delete() - situation.announcements.clear() + if not created: + SituationAnnouncement.objects.filter(situation=situation).delete() + situation.announcements.clear() for announcement_data in properties.get("announcements", []): - situation_location = self.create_location( - geometry, announcement_data - ) situation_announcement = self.create_announcement( - deepcopy(announcement_data), situation_location + deepcopy(announcement_data) + ) + self.create_location( + geometry, announcement_data, situation_announcement ) situation.announcements.add(situation_announcement) num_imported += 1 diff --git a/mobility_data/README.md b/mobility_data/README.md index 36398ce48..0db5ac0aa 100644 --- a/mobility_data/README.md +++ b/mobility_data/README.md @@ -26,6 +26,11 @@ To import data type: ``` ./manage.py import_charging_stations ``` +### Parking garages +To import data type: +``` +./manage.py import_parking_garages +``` ### Culture Routes To import data type: ``` @@ -165,7 +170,6 @@ To import data type: ./manage.py import_wfs BarbecuePlace ``` - ### Playgrounds ``` ./manage.py import_wfs PlayGround @@ -185,7 +189,7 @@ Imports the outdoor gym devices from the services.unit model. i.e., sets referen ### Parking machines ``` -./manage.py import_parking_machines +./manage.py import_wfs ParkingMachine ``` ### School and kindergarten accessibility areas diff --git a/mobility_data/constants.py b/mobility_data/constants.py index 8d5a03033..3fdb2edc8 100644 --- a/mobility_data/constants.py +++ b/mobility_data/constants.py @@ -12,6 +12,7 @@ from mobility_data.importers.loading_unloading_places import ( CONTENT_TYPE_NAME as LOADING_UNLOADING_PLACE, ) +from mobility_data.importers.parking_garages import CONTENT_TYPE_NAME as PARKING_GARAGE from mobility_data.importers.parking_machines import ( CONTENT_TYPE_NAME as PARKING_MACHINE, ) @@ -55,4 +56,8 @@ "importer_name": "parking_machines", "to_services_list": False, }, + PARKING_GARAGE: { + "importer_name": "parking_garages", + "to_services_list": False, + }, } diff --git a/mobility_data/data/parkkihallit.csv b/mobility_data/data/parkkihallit.csv new file mode 100644 index 000000000..c634066f3 --- /dev/null +++ b/mobility_data/data/parkkihallit.csv @@ -0,0 +1,21 @@ +Nimi;Osoite;N;E;Pysäköintipaikat;Invapaikat;Sähkölatauspaikat;Palvelut;Palvelut (ru);Palvelut (eng);Huom;Huom (ru);Huom (eng);Linkki sivuille +Auriga;Juhana Herttuan puistokatu 21;6703305;23457449;330;2;2 x Type 2 22 kW;Apuvirta, hissi, liikkumisesteisen pysäköintipaikka, sähköauton latauspiste;Elbilsladdning, hiss, parkeringsplats för funktionshindrade, startkablar;Disabled parking, elevator, EV charging, jump leads;;;;https://www.aimopark.fi/kaupungit/turku/auriga-/ +Hansakortteli;Kristiinankatu 11;6704479;23459433;162;2;2 x Type 2 11kW, 2 x CSS 90kW;Apuvirta, hissi, kameratunnistus, kengänkiillotin, liikkumisesteisen pysäköintipaikka, ostoskärryt, rengaspumppu, sateenvarjon lainaus, sähköauton latauspiste, videovalvonta, yövartiointi;Elbilsladdning, hiss, kameraparkering, kameraövervakat, kundvagnar, nattlig övervakning, paraply, parkeringsplats för funktionshindrade, skoputs, startkablar, tryckluft;Automatic number-plate recognition (ANPR), CCTV, disabled parking, elevator, EV charging, jump leads, shoe polisher, shopping trolley, surveillance at night, tyre pump, umbrella hire;Parkkihallin omalla sivulla ja aimoparkin latausverkostokartalla eriävää tietoa latauspisteistä;Information angående elbilsladdning på parkeringshusets hemsida strider mot informationen på aimoparks laddningsnätverk karta;Information regarding EV charging on the parking garage's website conflicts with the information on aimopark's charging network map;https://www.aimopark.fi/kaupungit/turku/hansakortteli/ +Harppuunaparkki;Vallihaudankatu 1;6702966;23457475;294;2;4 x Type 2 22 kW;Sähköauton latauspiste;Elbilsladdning;EV charging;Parkkihallin omalla sivulla ja aimoparkin latausverkostokartalla eriävää tietoa latauspisteistä;Information angående elbilsladdning på parkeringshusets hemsida strider mot informationen på aimoparks laddningsnätverk karta;Information regarding EV charging on the parking garage's website conflicts with the information on aimopark's charging network map;https://www.aimopark.fi/kaupungit/turku/harppuunaparkki/ +Hesburger Kupittaa;Lemminkäisenkatu 13;6703912;23461509;48;1; ;Apuvirta, hissi, liikkumisesteisen pysäköintipaikka, sähköauton latauspiste;Elbilsladdning, hiss, parkeringsplats för funktionshindrade, startkablar;Disabled parking, elevator, EV charging, jump leads;;;;https://www.aimopark.fi/fi-fi/cities/turku/hesburger-kupittaa/ +Itäharjun kenttä;Karjakatu 37;6704308;23461903;300;;;Apuvirta;Startkablar;Jump leads;;;;https://www.aimopark.fi/kaupungit/turku/itaharjun-kentta/ +Julia;Brahenkatu 3;6704636;23459940;260;2;2 x Type 2 11 kW;Apuvirta, defibrillaattori, hissi, info, puhelimen kuuluvuusalue, sähköauton latauspiste, videovalvonta, yövartiointi;Elbilsladdning, hiss, hjärtstartare, information, kameraövervakat, mobiltäckning, nattlig övervakning, startkablar;Automated external defibrillator (AED), CCTV, elevator, EV charging, information, jump leads, mobile phone coverage, surveillance at night;;;;https://www.aimopark.fi/kaupungit/turku/julia/ +Kivikukkaro;Yliopistonkatu 29;6704478;23459294;216;3;2 x Type 2 22kW;Apuvirta, julkinen liikenne, korttimaksu, käteismaksu, mobiilimaksu, pyöräparkki, sähköauton latauspiste, videovalvonta, yövartiointi;Cykelparkering, elbilsladdning, kameraövervakat, kollektivtrafik, kortbetalning, mobilbetalning, myntbetalning, nattlig övervakning, startkablar;Bicycle parking, CCTV, coin payment, credit card payment, EV charging, jump leads, mobile payment, public transportation, surveillance at night;;;;https://www.aimopark.fi/kaupungit/turku/kivikukkaro/ +Kupittaanpuisto;Lenkkipolku;6704227;23461020;86;;;;;;;;;https://www.aimopark.fi/kaupungit/turku/kupittaanpuisto/ +Louhi;Läntinen Pitkäkatu 12 B;6704994;23459306;608;10;10 x Type 2 22 kW, 2 x CSS 60kW;Apuvirta, autopesula, defibrillaattori, hissi, info, kameratunnistus, kengänkiilltoin, liikkumistesteisen pysäköintipaikka, ostoskärryt, sateenvarjon lainaus, sähköauton latauspiste, videovalvonta;Biltvätt, elbilsladdning, hiss, hjärtstartare, information, kameraparkering, kameraövervakat, kundvagnar, paraply, parkeringsplats för funktionshindrade, skoputs, startkablar;Automated external defibrillator (AED), automatic number plate recognition, carwash, CCTV, disabled parking, elevator, EV charging, information, jump leads, shoe polisher, shopping trolley, umbrella hire;Parkkihallin omalla sivulla ja aimoparkin latausverkostokartalla eriävää tietoa latauspisteistä;Information angående elbilsladdning på parkeringshusets hemsida strider mot informationen på aimoparks laddningsnätverk karta;Information regarding EV charging on the parking garage's website conflicts with the information on aimopark's charging network map;https://www.aimopark.fi/kaupungit/turku/louhi/ +ParkCity;Joukahaisenkatu 8;6704372;23461311;990;;20 x Type 2 11 kW;Autovuokraamo, hissi, julkinen liikenne, kameratunnistus, pyöräparkki, sähköauton latauspiste, videovalvonta;Biluthyring, cykelparkering, elbilsladdning, hiss, kameraparkering, kameraövervakat, kollektivtrafik;Automatic number-plate recognition (ANPR), bicycle parking, car rental, CCTV, elevator, EV charging, public transportation;;;;https://www.aimopark.fi/kaupungit/turku/parkcity/ +Pharmacity;Lemminkäisenkatu 9;6704134;23461240;220;;;Apuvirta;Startkablar;Jump leads;;;;https://www.aimopark.fi/kaupungit/turku/pharmacity/ +P-Centrum;Kristiinankatu 8;6704432;23459438;88;;;;;;;;;https://www.europark.fi/pysakointi/p-centrum/ +P-Puutori;Brahenkatu 13;6704995;23459698;100;;;;;;;;;https://www.p-puutori.fi/ +Savitehtaankadun pysäköintitalo;Savitehtaankatu 7;6704694;23461306;100;;;;;;;;; +Scandic Plaza;Yliopistonkatu 29;6704504;23459333;42;;1 x Type 2 22 kW;Sähköauton latauspiste;Elbilsladdning;EV charging;;;;https://www.aimopark.fi/kaupungit/turku/scandic-plaza/ +Tahkonaukio;Lemminkäisenkatu 9 D;6704136;23461236;120;;;;;;;;;https://www.aimopark.fi/kaupungit/turku/tahkonaukio/ +Toriparkki;;6704748;23459727;620; ;20;Autopesula, sähköauton latauspiste;Biltvätt, elbilsladdning ;Carwash, EV charging;;;;https://www.turuntoriparkki.fi/ +Turun teknologiakiinteistöt;Tykistökatu 6;6704371;23461176;800;;;;;;;;;https://www.aimopark.fi/kaupungit/turku/teknologiakiinteistot/ +Trivium;Lemminkäisenkatu 32;6703957;23461524;461;2;4 x Type 2 22 kW;;;;;;;https://www.aimopark.fi/kaupungit/turku/trivium/ +Wiklund;Brahenkatu 8;6704702;23459855;;4;;;;;;;;https://www.europark.fi/pysakointi/p-wiklund-turku/ \ No newline at end of file diff --git a/mobility_data/importers/bicycle_stands.py b/mobility_data/importers/bicycle_stands.py index a1b3b4ec4..df59b8c2b 100644 --- a/mobility_data/importers/bicycle_stands.py +++ b/mobility_data/importers/bicycle_stands.py @@ -2,6 +2,7 @@ Note, bicycle stands are not imorter via the wfs importer as it needs logic to derive if the stand is hull lockable or covered. """ + import logging import os diff --git a/mobility_data/importers/bike_service_stations.py b/mobility_data/importers/bike_service_stations.py index 3dff7e082..a50fcabf9 100644 --- a/mobility_data/importers/bike_service_stations.py +++ b/mobility_data/importers/bike_service_stations.py @@ -50,9 +50,9 @@ def __init__(self, feature): # If no swedish address, retrieve it from the database. if language == "sv": street_name, number = addresses[0].split(" ") - self.address[ - language - ] = f"{get_street_name_translations(street_name, municipality)[language]} number" + self.address[language] = ( + f"{get_street_name_translations(street_name, municipality)[language]} number" + ) # Source data does not contain English addresses, assign the Finnsh else: self.address[language] = addresses[0] diff --git a/mobility_data/importers/charging_stations.py b/mobility_data/importers/charging_stations.py index 21f80a9ad..555fe50b9 100644 --- a/mobility_data/importers/charging_stations.py +++ b/mobility_data/importers/charging_stations.py @@ -6,10 +6,9 @@ from munigeo.models import Municipality from .utils import ( - get_file_name_from_data_source, + get_full_csv_file_name, get_municipality_name, get_postal_code, - get_root_dir, get_street_name_translations, LANGUAGES, MobileUnitDataBase, @@ -111,16 +110,9 @@ def get_number_of_rows(file_name): return number_of_rows -def get_csv_file_name(): - file_name = get_file_name_from_data_source(CONTENT_TYPE_NAME) - if file_name: - return file_name - return f"{get_root_dir()}/mobility_data/data/{SOURCE_DATA_FILE_NAME}" - - def get_charging_station_objects(): # Store the imported stations to dict, the index is the key. - file_name = get_csv_file_name() + file_name = get_full_csv_file_name(SOURCE_DATA_FILE_NAME, CONTENT_TYPE_NAME) charging_stations = {} column_mappings = {} number_of_rows = get_number_of_rows(file_name) diff --git a/mobility_data/importers/data/content_types.yml b/mobility_data/importers/data/content_types.yml index c3b76eb5f..a40a6dbe8 100644 --- a/mobility_data/importers/data/content_types.yml +++ b/mobility_data/importers/data/content_types.yml @@ -17,6 +17,11 @@ content_types: sv: Elladningsstation för bilar en: Car e-charging point + - content_type_name: ParkingGarage + name: + fi: Parkkihalli + sv: Parkeringsgarage + en: Parking garage - content_type_name: NoStaffParking name: fi: Yleiset pysäköintialueet diff --git a/mobility_data/importers/data/wfs_importer_config.yml b/mobility_data/importers/data/wfs_importer_config.yml index 9a0bf5f76..46b99e769 100644 --- a/mobility_data/importers/data/wfs_importer_config.yml +++ b/mobility_data/importers/data/wfs_importer_config.yml @@ -1,4 +1,26 @@ features: + - content_type_name: ParkingMachine + wfs_layer: GIS:Pysakointiautomaatit + translate_fi_address_field: Osoite + translate_fi_address_municipality_id: turku + extra_fields: + maksutapa_fi: + wfs_field: Maksutapa + maksutapa_sv: + wfs_field: Maksutapa_sv + maksutapa_en: + wfs_field: Maksutapa_en + maksuvyohyke: + wfs_field: Maksuvyohyke + taksa: + wfs_field: Taksa + muu_tieto_fi: + wfs_field: Muu_tieto + muu_tieto_sv: + wfs_field: Muu_tieto_sv + muu_tieto_en: + wfs_field: Muu_tieto_en + - content_type_name: StreetAreaInformation wfs_layer: GIS:Katualueet max_features: 100000 diff --git a/mobility_data/importers/data/wfs_importer_config_example.yml b/mobility_data/importers/data/wfs_importer_config_example.yml index ea98129bf..640d717fe 100644 --- a/mobility_data/importers/data/wfs_importer_config_example.yml +++ b/mobility_data/importers/data/wfs_importer_config_example.yml @@ -16,6 +16,13 @@ features: municipality: muni_field # Optional, if set, include only if geometry is inside the boundarys of Turku, default=False locates_in_turku: True + # Optional, Add the field from which the Finnish address is fetched + # and the importer will assign and lookup the Swedish and the English translations to it. + # Suitable if only Finnish address is available in the source data. + translate_fi_address_field: field_with_finnish_address + # Required if "translate_fi_address_field" is used. + # municipality id of the municipality from which to lookup the address translations. + translate_fi_address_municipality_id: turku # Optional, include only if 'field_name' contains the given string. include: field_name: this_must_be_in_field_name diff --git a/mobility_data/importers/marinas.py b/mobility_data/importers/marinas.py index a2be3283a..1ae436181 100644 --- a/mobility_data/importers/marinas.py +++ b/mobility_data/importers/marinas.py @@ -3,6 +3,7 @@ Note, wfs importer is not used as the berths data is separately assigned to the marina mobile units. """ + import logging from django.conf import settings diff --git a/mobility_data/importers/parking_garages.py b/mobility_data/importers/parking_garages.py new file mode 100644 index 000000000..567e084d6 --- /dev/null +++ b/mobility_data/importers/parking_garages.py @@ -0,0 +1,91 @@ +import csv +import logging + +from django.conf import settings +from django.contrib.gis.geos import Point +from munigeo.models import Municipality + +from .utils import ( + get_full_csv_file_name, + get_municipality_name, + get_street_name_translations, + LANGUAGES, + MobileUnitDataBase, + split_string_at_first_digit, +) + +logger = logging.getLogger("mobility_data") +SOURCE_DATA_SRID = 3877 + +CONTENT_TYPE_NAME = "ParkingGarage" +SOURCE_DATA_FILE_NAME = "parkkihallit.csv" +COLUMN_MAPPINGS = { + "name": 0, + "address": 1, + "N": 2, + "E": 3, + "parking_spaces": 4, + "disabled_spaces": 5, + "charging_stations": 6, + "services_fi": 7, + "services_sv": 8, + "services_en": 9, + "notes_fi": 10, + "notes_sv": 11, + "notes_en": 12, +} + + +class ParkingGarage(MobileUnitDataBase): + + def __init__(self, values): + super().__init__() + x = float(values[COLUMN_MAPPINGS["E"]]) + y = float(values[COLUMN_MAPPINGS["N"]]) + self.geometry = Point(x, y, srid=SOURCE_DATA_SRID) + self.geometry.transform(settings.DEFAULT_SRID) + try: + self.municipality = Municipality.objects.get( + name=get_municipality_name(self.geometry) + ) + except Municipality.DoesNotExist: + self.municipality = None + address = values[COLUMN_MAPPINGS["address"]] + street_name, street_number = split_string_at_first_digit(address) + # As the source data contains only Finnish street names, we need to get the translations + translated_street_names = get_street_name_translations( + street_name.strip(), self.municipality + ) + self.extra["services"] = {} + self.extra["notes"] = {} + for lang in LANGUAGES: + self.name[lang] = values[COLUMN_MAPPINGS["name"]] + self.address[lang] = f"{translated_street_names[lang]} {street_number}" + self.extra["services"][lang] = values[COLUMN_MAPPINGS[f"services_{lang}"]] + self.extra["notes"][lang] = values[COLUMN_MAPPINGS[f"notes_{lang}"]] + + try: + parking_spaces = int(values[COLUMN_MAPPINGS["parking_spaces"]]) + except ValueError: + parking_spaces = None + self.extra["parking_spaces"] = parking_spaces + + try: + disabled_spaces = int(values[COLUMN_MAPPINGS["disabled_spaces"]]) + except ValueError: + disabled_spaces = None + self.extra["disabled_spaces"] = disabled_spaces + self.extra["charging_stations"] = values[COLUMN_MAPPINGS["charging_stations"]] + + +def get_parking_garage_objects(): + file_name = get_full_csv_file_name(SOURCE_DATA_FILE_NAME, CONTENT_TYPE_NAME) + parking_garages = [] + with open(file_name, encoding="utf-8-sig") as csv_file: + csv_reader = csv.reader(csv_file, delimiter=";") + for i, row in enumerate(csv_reader): + # Discard header row + if i > 0: + parking_garages.append(ParkingGarage(row)) + + return parking_garages diff --git a/mobility_data/importers/share_car_parking_places.py b/mobility_data/importers/share_car_parking_places.py index bed241ed1..25f7898cc 100644 --- a/mobility_data/importers/share_car_parking_places.py +++ b/mobility_data/importers/share_car_parking_places.py @@ -47,9 +47,9 @@ def __init__(self, feature): street_name["en"] = street_name["fi"] self.extra[self.RESTRICTION_FIELD] = {} for i, language in enumerate(LANGUAGES): - self.name[ - language - ] = f"{self.CAR_PARKING_NAME[language]}, {street_name[language]}" + self.name[language] = ( + f"{self.CAR_PARKING_NAME[language]}, {street_name[language]}" + ) self.address[language] = street_name[language] self.extra[self.RESTRICTION_FIELD][language] = restrictions[i].strip() diff --git a/mobility_data/importers/utils.py b/mobility_data/importers/utils.py index 2a0a88ab5..131b8a9d3 100644 --- a/mobility_data/importers/utils.py +++ b/mobility_data/importers/utils.py @@ -388,3 +388,22 @@ def create_mobile_units_as_unit_references(service_id, content_type): obj.unit_id = unit.id objects.append(obj) save_to_database(objects, content_type) + + +def get_full_csv_file_name(csv_file_name, content_type_name): + file_name = get_file_name_from_data_source(content_type_name) + if file_name: + return file_name + return f"{get_root_dir()}/mobility_data/data/{csv_file_name}" + + +def split_string_at_first_digit(s): + match = re.search(r"\d", s) + if match: + index = match.start() + return ( + s[:index], + s[index:], + ) + else: + return s, "" diff --git a/mobility_data/importers/wfs.py b/mobility_data/importers/wfs.py index 666a46f4a..506b4de66 100644 --- a/mobility_data/importers/wfs.py +++ b/mobility_data/importers/wfs.py @@ -11,10 +11,13 @@ from mobility_data.importers.utils import ( delete_mobile_units, get_or_create_content_type_from_config, + get_street_name_translations, + LANGUAGES, locates_in_turku, log_imported_message, MobileUnitDataBase, save_to_database, + split_string_at_first_digit, ) DEFAULT_SOURCE_DATA_SRID = 3877 @@ -40,6 +43,7 @@ def __init__(self): super().__init__() def add_feature(self, feature, config): + municipality = None create_multipolygon = False if "create_multipolygon" in config: create_multipolygon = config["create_multipolygon"] @@ -97,7 +101,6 @@ def add_feature(self, feature, config): self.municipality = Municipality.objects.filter( id=municipality_id ).first() - if "fields" in config: for attr, field in config["fields"].items(): for lang, field_name in field.items(): @@ -105,6 +108,25 @@ def add_feature(self, feature, config): if getattr(self, attr)[lang] is None: getattr(self, attr)[lang] = feature[field_name].as_string() + if "translate_fi_address_municipality_id" in config: + municipality = Municipality.objects.filter( + id=config["translate_fi_address_municipality_id"].lower() + ).first() + + if "translate_fi_address_field" in config: + address = feature[config["translate_fi_address_field"]].as_string() + if not address[0].isdigit(): + street_name, street_number = split_string_at_first_digit(address) + else: + street_name = address + street_number = "" + muni = municipality if municipality else self.municipality + translated_street_names = get_street_name_translations( + street_name.strip(), muni + ) + for lang in LANGUAGES: + self.address[lang] = f"{translated_street_names[lang]} {street_number}" + if "extra_fields" in config: for field, attr in config["extra_fields"].items(): val = None @@ -168,9 +190,12 @@ def import_wfs_feature(config, data_file=None): assert len(ds) == 1 layer = ds[0] for feature in layer: - object = MobilityData() - if object.add_feature(feature, config): - objects.append(object) + try: + object = MobilityData() + if object.add_feature(feature, config): + objects.append(object) + except Exception as e: + logger.warning(f"Discarding feature {feature}, cause: {e}") content_type = get_or_create_content_type_from_config(config["content_type_name"]) num_created, num_deleted = save_to_database(objects, content_type) log_imported_message(logger, content_type, num_created, num_deleted) diff --git a/mobility_data/management/commands/import_mobility_data.py b/mobility_data/management/commands/import_mobility_data.py index bba7c77a9..018d5a7be 100644 --- a/mobility_data/management/commands/import_mobility_data.py +++ b/mobility_data/management/commands/import_mobility_data.py @@ -1,6 +1,7 @@ """ Imports all mobility data sources. """ + import logging from django.core import management diff --git a/mobility_data/management/commands/import_parking_garages.py b/mobility_data/management/commands/import_parking_garages.py new file mode 100644 index 000000000..ef2fd889f --- /dev/null +++ b/mobility_data/management/commands/import_parking_garages.py @@ -0,0 +1,24 @@ +import logging + +from django.core.management import BaseCommand + +from mobility_data.importers.parking_garages import ( + CONTENT_TYPE_NAME, + get_parking_garage_objects, +) +from mobility_data.importers.utils import ( + get_or_create_content_type_from_config, + log_imported_message, + save_to_database, +) + +logger = logging.getLogger("mobility_data") + + +class Command(BaseCommand): + def handle(self, *args, **options): + logger.info("Importing parking garages...") + objects = get_parking_garage_objects() + content_type = get_or_create_content_type_from_config(CONTENT_TYPE_NAME) + num_created, num_deleted = save_to_database(objects, content_type) + log_imported_message(logger, content_type, num_created, num_deleted) diff --git a/mobility_data/management/commands/import_parking_machines.py b/mobility_data/management/commands/import_parking_machines.py index 97a032f87..39a764b6d 100644 --- a/mobility_data/management/commands/import_parking_machines.py +++ b/mobility_data/management/commands/import_parking_machines.py @@ -1,3 +1,9 @@ +""" +Deprecated, the parking machines will in future be imported with the WFS importer. +All code related to this importer can be removed after the importing +from WFS feature is in the production environment. +""" + import logging from django.core.management import BaseCommand diff --git a/mobility_data/tasks.py b/mobility_data/tasks.py index 8d4fef6fa..70e5e9918 100644 --- a/mobility_data/tasks.py +++ b/mobility_data/tasks.py @@ -155,7 +155,7 @@ def import_wfs(args=None, name="import_wfs"): @shared_task_email def import_parking_machines(name="import_parking_machines"): - management.call_command("import_parking_machines") + management.call_command("import_wfs", "ParkingMachine") @shared_task_email @@ -168,6 +168,11 @@ def import_street_area_information(name="import_street_area_information"): management.call_command("import_wfs", "StreetAreaInformation") +@shared_task_email +def import_parking_garages(name="import_parking_garages"): + management.call_command("import_parking_garages") + + @shared_task_email def delete_obsolete_data(name="delete_obsolete_data"): MobileUnit.objects.filter(content_types__isnull=True).delete() diff --git a/mobility_data/tests/conftest.py b/mobility_data/tests/conftest.py index 7cbb8c523..c29e64fcc 100644 --- a/mobility_data/tests/conftest.py +++ b/mobility_data/tests/conftest.py @@ -235,6 +235,13 @@ def streets(): name_sv="Bangårdsgatan", municipality_id="turku", ) + Street.objects.create( + name="Juhana Herttuan puistokatu", + name_fi="Juhana Herttuan puistokatu", + name_sv="Hertig Johans parkgata", + name_en="Juhana Herttuan puistokatu", + municipality_id="turku", + ) return Street.objects.all() diff --git a/mobility_data/tests/data/parkkihallit_fixtures.csv b/mobility_data/tests/data/parkkihallit_fixtures.csv new file mode 100644 index 000000000..1811d4590 --- /dev/null +++ b/mobility_data/tests/data/parkkihallit_fixtures.csv @@ -0,0 +1,3 @@ +Nimi;Osoite;N;E;Pysäköintipaikat;Invapaikat;Sähkölatauspaikat;Palvelut;Palvelut (ru);Palvelut (eng);Huom;Huom (ru);Huom (eng);Linkki sivuille +Auriga;Juhana Herttuan puistokatu 21;6703305;23457449;330;2;2 x Type 2 22 kW;Apuvirta, hissi, liikkumisesteisen pysäköintipaikka, sähköauton latauspiste;Elbilsladdning, hiss, parkeringsplats för funktionshindrade, startkablar;Disabled parking, elevator, EV charging, jump leads;;;;https://www.aimopark.fi/kaupungit/turku/auriga-/ +Hansakortteli;Kristiinankatu 11;6704479;23459433;162;2;2 x Type 2 11kW, 2 x CSS 90kW;Apuvirta, hissi, kameratunnistus, kengänkiillotin, liikkumisesteisen pysäköintipaikka, ostoskärryt, rengaspumppu, sateenvarjon lainaus, sähköauton latauspiste, videovalvonta, yövartiointi;Elbilsladdning, hiss, kameraparkering, kameraövervakat, kundvagnar, nattlig övervakning, paraply, parkeringsplats för funktionshindrade, skoputs, startkablar, tryckluft;Automatic number-plate recognition (ANPR), CCTV, disabled parking, elevator, EV charging, jump leads, shoe polisher, shopping trolley, surveillance at night, tyre pump, umbrella hire;Parkkihallin omalla sivulla ja aimoparkin latausverkostokartalla eriävää tietoa latauspisteistä;Information angående elbilsladdning på parkeringshusets hemsida strider mot informationen på aimoparks laddningsnätverk karta;Information regarding EV charging on the parking garage's website conflicts with the information on aimopark's charging network map;https://www.aimopark.fi/kaupungit/turku/hansakortteli/ \ No newline at end of file diff --git a/mobility_data/tests/test_api.py b/mobility_data/tests/test_api.py index 34b95695d..2946a09ea 100644 --- a/mobility_data/tests/test_api.py +++ b/mobility_data/tests/test_api.py @@ -1,6 +1,4 @@ import pytest -from django.conf import settings -from django.contrib.gis.geos import Point from rest_framework.reverse import reverse @@ -47,9 +45,7 @@ def test_mobile_unit(api_client, mobile_units, content_types, unit): assert result["extra"]["test_string"] == "4242" assert result["extra"]["test_int"] == 4242 assert result["extra"]["test_float"] == 42.42 - assert result["geometry"] == Point( - 235404.6706163187, 6694437.919005549, srid=settings.DEFAULT_SRID - ) + assert "POINT" in result["geometry"] url = reverse( "mobility_data:mobile_units-detail", args=["ba6c2903-d36f-4c61-b828-19084fc7a64b"], diff --git a/mobility_data/tests/test_import_accessories.py b/mobility_data/tests/test_import_accessories.py index 3e89549cc..2cefd5de8 100644 --- a/mobility_data/tests/test_import_accessories.py +++ b/mobility_data/tests/test_import_accessories.py @@ -7,6 +7,7 @@ has been removed from the test input data, as it causes GDAL DataSource to fail when loading data. """ + from unittest.mock import patch import pytest diff --git a/mobility_data/tests/test_import_charging_stations.py b/mobility_data/tests/test_import_charging_stations.py index 8b0a99ae7..4e2cef58c 100644 --- a/mobility_data/tests/test_import_charging_stations.py +++ b/mobility_data/tests/test_import_charging_stations.py @@ -14,9 +14,9 @@ @pytest.mark.django_db -@patch("mobility_data.importers.charging_stations.get_csv_file_name") +@patch("mobility_data.importers.charging_stations.get_full_csv_file_name") def test_import_charging_stations( - get_csv_file_name_mock, + get_full_csv_file_name_mock, municipalities, administrative_division_type, administrative_division, @@ -30,7 +30,7 @@ def test_import_charging_stations( ) file_name = f"{get_root_dir()}/mobility_data/tests/data/charging_stations.csv" - get_csv_file_name_mock.return_value = file_name + get_full_csv_file_name_mock.return_value = file_name content_type = get_or_create_content_type_from_config(CONTENT_TYPE_NAME) objects = get_charging_station_objects() num_created, num_deleted = save_to_database(objects, content_type) @@ -79,7 +79,7 @@ def test_import_charging_stations( == f"{CHARGING_STATION_SERVICE_NAMES['en']}, Ratapihankatu 53" ) # Test that dublicates are not created - get_csv_file_name_mock.return_vale = file_name + get_full_csv_file_name_mock.return_vale = file_name content_type = get_or_create_content_type_from_config(CONTENT_TYPE_NAME) objects = get_charging_station_objects() num_created, num_deleted = save_to_database(objects, content_type) diff --git a/mobility_data/tests/test_import_parking_garages.py b/mobility_data/tests/test_import_parking_garages.py new file mode 100644 index 000000000..5e0149c79 --- /dev/null +++ b/mobility_data/tests/test_import_parking_garages.py @@ -0,0 +1,69 @@ +from unittest.mock import patch + +import pytest + +from mobility_data.importers.utils import ( + get_content_type_config, + get_or_create_content_type_from_config, + get_root_dir, + save_to_database, +) +from mobility_data.models import ContentType, MobileUnit + + +@pytest.mark.django_db +@patch("mobility_data.importers.parking_garages.get_full_csv_file_name") +def test_import_parking_garages( + get_full_csv_file_name_mock, + municipalities, + administrative_division_type, + administrative_division, + administrative_division_geometry, + streets, + address, +): + from mobility_data.importers.parking_garages import ( + CONTENT_TYPE_NAME, + get_parking_garage_objects, + ) + + file_name = f"{get_root_dir()}/mobility_data/tests/data/parkkihallit_fixtures.csv" + get_full_csv_file_name_mock.return_value = file_name + content_type = get_or_create_content_type_from_config(CONTENT_TYPE_NAME) + objects = get_parking_garage_objects() + num_created, num_deleted = save_to_database(objects, content_type) + assert num_created == 2 + assert num_deleted == 0 + assert ContentType.objects.filter(type_name=CONTENT_TYPE_NAME).count() == 1 + assert ( + MobileUnit.objects.filter(content_types__type_name=CONTENT_TYPE_NAME).count() + == 2 + ) + config = get_content_type_config(CONTENT_TYPE_NAME) + content_type = ContentType.objects.get(type_name=CONTENT_TYPE_NAME) + content_type.name_fi = config["name"]["fi"] + content_type.name_sv = config["name"]["sv"] + content_type.name_en = config["name"]["en"] + + auriga = MobileUnit.objects.get(name="Auriga") + assert auriga.name_sv == "Auriga" + assert auriga.name_en == "Auriga" + assert auriga.address_fi == "Juhana Herttuan puistokatu 21" + assert auriga.address_sv == "Hertig Johans parkgata 21" + assert auriga.address_en == "Juhana Herttuan puistokatu 21" + assert auriga.municipality.name == "Turku" + assert auriga.extra["parking_spaces"] == 330 + assert auriga.extra["disabled_spaces"] == 2 + assert auriga.extra["charging_stations"] == "2 x Type 2 22 kW" + assert ( + auriga.extra["services"]["fi"] + == "Apuvirta, hissi, liikkumisesteisen pysäköintipaikka, sähköauton latauspiste" + ) + assert ( + auriga.extra["services"]["sv"] + == "Elbilsladdning, hiss, parkeringsplats för funktionshindrade, startkablar" + ) + assert ( + auriga.extra["services"]["en"] + == "Disabled parking, elevator, EV charging, jump leads" + ) diff --git a/mobility_data/tests/test_import_payment_zones.py b/mobility_data/tests/test_import_payment_zones.py index 5f9e7f6fc..17150ef8a 100644 --- a/mobility_data/tests/test_import_payment_zones.py +++ b/mobility_data/tests/test_import_payment_zones.py @@ -6,6 +6,7 @@ has been removed from the test input data, as it causes GDAL DataSource to fail when loading data. """ + from unittest.mock import patch import pytest diff --git a/mobility_data/tests/test_import_speed_limits.py b/mobility_data/tests/test_import_speed_limits.py index 28f3d8771..43d924ace 100644 --- a/mobility_data/tests/test_import_speed_limits.py +++ b/mobility_data/tests/test_import_speed_limits.py @@ -8,6 +8,7 @@ has been removed from the test input data, as it causes GDAL DataSource to fail when loading data. """ + import pytest from django.conf import settings diff --git a/requirements-dev.txt b/requirements-dev.txt index e823f0278..bbe45dff4 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -8,7 +8,7 @@ asttokens==2.0.5 # via stack-data backcall==0.2.0 # via ipython -black==22.6.0 +black==24.3.0 # via # -c requirements.txt # ipython @@ -32,6 +32,10 @@ mypy-extensions==0.4.3 # via # -c requirements.txt # black +packaging==24.0 + # via + # -c requirements.txt + # black parso==0.8.2 # via # -c requirements.txt @@ -64,10 +68,18 @@ six==1.16.0 # asttokens stack-data==0.2.0 # via ipython +tomli==1.2.1 + # via + # -c requirements.txt + # black traitlets==5.1.0 # via # ipython # matplotlib-inline +typing-extensions==4.11.0 + # via + # -c requirements.txt + # black wcwidth==0.2.5 # via # -c requirements.txt diff --git a/requirements.txt b/requirements.txt index b3cf0e537..3cb5a9011 100644 --- a/requirements.txt +++ b/requirements.txt @@ -20,7 +20,7 @@ attrs==21.2.0 # requests-cache billiard==3.6.4.0 # via celery -black==22.6.0 +black==24.3.0 # via -r requirements.in cattrs==1.8.0 # via requests-cache @@ -108,7 +108,7 @@ flake8==3.9.2 # pep8-naming flake8-polyfill==1.0.2 # via pep8-naming -idna==3.2 +idna==3.7 # via requests inflection==0.5.1 # via drf-spectacular @@ -138,8 +138,10 @@ numpy==1.23.0 # via # -r requirements.in # pandas -packaging==21.0 - # via pytest +packaging==24.0 + # via + # black + # pytest pandas==2.0.1 # via -r requirements.in parso==0.8.2 @@ -172,8 +174,6 @@ pyflakes==2.3.1 # via flake8 pykml==0.2.0 # via -r requirements.in -pyparsing==2.4.7 - # via packaging pyrsistent==0.19.3 # via jsonschema pyshp==2.3.1 @@ -228,7 +228,7 @@ six==1.16.0 # python-dateutil # requests-mock # url-normalize -sqlparse==0.4.4 +sqlparse==0.5.0 # via django toml==0.10.2 # via @@ -240,6 +240,8 @@ tomli==1.2.1 # pep517 tqdm==4.62.3 # via -r requirements.in +typing-extensions==4.11.0 + # via black tzdata==2022.1 # via # django-celery-beat diff --git a/services/api.py b/services/api.py index 5fd368141..9f57361ee 100644 --- a/services/api.py +++ b/services/api.py @@ -134,9 +134,9 @@ def to_internal_value(self, data): value = obj[language] # "musiikkiklubit" if language == settings.LANGUAGES[0][0]: # default language extra_fields[field_name] = value # { "name": "musiikkiklubit" } - extra_fields[ - "{}_{}".format(field_name, language) - ] = value # { "name_fi": "musiikkiklubit" } + extra_fields["{}_{}".format(field_name, language)] = ( + value # { "name_fi": "musiikkiklubit" } + ) del data[field_name] # delete original translated fields # handle other than translated fields @@ -733,9 +733,9 @@ def to_representation(self, obj): if "accessibility_shortcoming_count" in getattr( self, "keep_fields", ["accessibility_shortcoming_count"] ): - ret[ - "accessibility_shortcoming_count" - ] = shortcomings.accessibility_shortcoming_count + ret["accessibility_shortcoming_count"] = ( + shortcomings.accessibility_shortcoming_count + ) if "request" not in self.context: return ret diff --git a/services/content_metrics.py b/services/content_metrics.py index af43aba7a..e7607cc26 100644 --- a/services/content_metrics.py +++ b/services/content_metrics.py @@ -5,6 +5,7 @@ with either long field contents or a large amount of related objects. """ + from django.db.models import Case, Count, IntegerField, Sum, When from django.db.models.functions import Length diff --git a/services/search/api.py b/services/search/api.py index 10408ef68..39046fe67 100644 --- a/services/search/api.py +++ b/services/search/api.py @@ -17,12 +17,14 @@ - The search_columns can be manually updated with the index_search_columns and emptied with the empty_search_columns management script. """ + import logging import re from itertools import chain from django.db import connection, reset_queries from django.db.models import Count +from drf_spectacular.utils import extend_schema, OpenApiParameter from munigeo import api as munigeo_api from munigeo.models import Address, AdministrativeDivision from rest_framework import serializers @@ -45,6 +47,7 @@ from .constants import ( DEFAULT_MODEL_LIMIT_VALUE, + DEFAULT_RANK_THRESHOLD, DEFAULT_SEARCH_SQL_LIMIT_VALUE, DEFAULT_SRS, DEFAULT_TRIGRAM_THRESHOLD, @@ -127,9 +130,9 @@ def to_representation(self, obj): shortcomings = obj.accessibility_shortcomings except UnitAccessibilityShortcomings.DoesNotExist: shortcomings = UnitAccessibilityShortcomings() - representation[ - "accessibility_shortcoming_count" - ] = shortcomings.accessibility_shortcoming_count + representation["accessibility_shortcoming_count"] = ( + shortcomings.accessibility_shortcoming_count + ) representation["contract_type"] = UnitSerializer.get_contract_type( self, obj ) @@ -181,13 +184,144 @@ def to_representation(self, obj): return representation +@extend_schema( + parameters=[ + OpenApiParameter( + name="q", + location=OpenApiParameter.QUERY, + description="The query string used for searching. Searches the search_columns for the given models. Commas " + "between words are interpreted as 'and' operator. Words ending with the '|' sign are interpreted as 'or' " + "operator.", + required=False, + type=str, + ), + OpenApiParameter( + name="type", + location=OpenApiParameter.QUERY, + description="Comma separated list of types to search for. Valid values are: unit, service, servicenode, " + "address, administrativedivision. If not given defaults to all.", + required=False, + type=str, + ), + OpenApiParameter( + name="use_trigram", + location=OpenApiParameter.QUERY, + description="Comma separated list of types that will include trigram results in search if no results are " + "found. Valid values are: unit, service, servicenode, address, administrativedivision. If not given " + "trigram will not be used.", + required=False, + type=str, + ), + OpenApiParameter( + name="trigram_threshold", + location=OpenApiParameter.QUERY, + description="Threshold value for trigram search. If not given defaults to 0.15.", + required=False, + type=float, + ), + OpenApiParameter( + name="rank_threshold", + location=OpenApiParameter.QUERY, + description="Include results with search rank greater than or equal to the value. If not given defaults to " + "0.", + required=False, + type=float, + ), + OpenApiParameter( + name="use_websearch", + location=OpenApiParameter.QUERY, + description="Use websearch_to_tsquery instead of to_tsquery if exlusion rules are defined for the search.", + required=False, + type=bool, + ), + OpenApiParameter( + name="geometry", + location=OpenApiParameter.QUERY, + description="Display geometry of the search result. If not given defaults to false.", + required=False, + type=bool, + ), + OpenApiParameter( + name="order_units_by_num_services", + location=OpenApiParameter.QUERY, + description="Order units by number of services. If not given defaults to true.", + required=False, + type=bool, + ), + OpenApiParameter( + name="order_units_by_provider_type", + location=OpenApiParameter.QUERY, + description="Order units by provider type. If not given defaults to true.", + required=False, + type=bool, + ), + OpenApiParameter( + name="include", + location=OpenApiParameter.QUERY, + description="Comma separated list of fields to include in the response. Format: entity.field, e.g., " + "unit.connections.", + required=False, + type=str, + ), + OpenApiParameter( + name="sql_query_limit", + location=OpenApiParameter.QUERY, + description="Limit the number of results in the search query.", + required=False, + type=int, + ), + OpenApiParameter( + name="unit_limit", + location=OpenApiParameter.QUERY, + description="Limit the number of units in the search results.", + required=False, + type=int, + ), + OpenApiParameter( + name="service_limit", + location=OpenApiParameter.QUERY, + description="Limit the number of services in the search results.", + required=False, + type=int, + ), + OpenApiParameter( + name="servicenode_limit", + location=OpenApiParameter.QUERY, + description="Limit the number of service nodes in the search results.", + required=False, + type=int, + ), + OpenApiParameter( + name="administrativedivision_limit", + location=OpenApiParameter.QUERY, + description="Limit the number of administrative divisions in the search results.", + required=False, + type=int, + ), + OpenApiParameter( + name="address_limit", + location=OpenApiParameter.QUERY, + description="Limit the number of addresses in the search results.", + required=False, + type=int, + ), + OpenApiParameter( + name="language", + location=OpenApiParameter.QUERY, + description="The language to be used in the search. If not given defaults to Finnish. Format: fi, sv, en.", + required=False, + type=str, + ), + ], + description="Search for units, services, service nodes, addresses and administrative divisions.", +) class SearchViewSet(GenericAPIView): queryset = Unit.objects.all() def get(self, request): model_limits = {} show_only_address = False - units_order_list = ["provider_type"] + units_order_list = [] for model in list(QUERY_PARAM_TYPE_NAMES): model_limits[model] = DEFAULT_MODEL_LIMIT_VALUE @@ -196,6 +330,12 @@ def get(self, request): if not q_val: raise ParseError("Supply search terms with 'q=' ' or input=' '") + if not re.match(r"^[\w\såäö.'+&|-]+$", q_val): + + raise ParseError( + "Invalid search terms, only letters, numbers, spaces and .'+-&| allowed." + ) + types_str = ",".join([elem for elem in QUERY_PARAM_TYPE_NAMES]) types = params.get("type", types_str).split(",") if "use_trigram" in self.request.query_params: @@ -209,10 +349,18 @@ def get(self, request): try: trigram_threshold = float(params.get("trigram_threshold")) except ValueError: - raise ParseError("'trigram_threshold' need to be of type float.") + raise ParseError("'trigram_threshold' needs to be of type float.") else: trigram_threshold = DEFAULT_TRIGRAM_THRESHOLD + if "rank_threshold" in params: + try: + rank_threshold = float(params.get("rank_threshold")) + except ValueError: + raise ParseError("'rank_threshold' needs to be of type float.") + else: + rank_threshold = DEFAULT_RANK_THRESHOLD + if "use_websearch" in params: try: use_websearch = strtobool(params["use_websearch"]) @@ -242,6 +390,18 @@ def get(self, request): if order_units_by_num_services: units_order_list.append("-num_services") + if "order_units_by_provider_type" in params: + try: + order_units_by_provider_type = strtobool( + params["order_units_by_provider_type"] + ) + except ValueError: + raise ParseError("'order_units_by_provider_type' needs to be a boolean") + else: + order_units_by_provider_type = True + + if order_units_by_provider_type: + units_order_list.append("provider_type") if "include" in params: include_fields = params["include"].split(",") else: @@ -277,10 +437,9 @@ def get(self, request): # Build conditional query string that is used in the SQL query. # split by "," or whitespace q_vals = re.split(r",\s+|\s+", q_val) - q_vals = [s.strip().replace("'", "") for s in q_vals] for q in q_vals: if search_query_str: - # if ends with "|"" make it a or + # if ends with "|" make it a or if q[-1] == "|": search_query_str += f"| {q[:-1]}:*" # else make it an and. @@ -297,14 +456,19 @@ def get(self, request): # This is ~100 times faster than using Djangos SearchRank and allows searching using wildard "|*" # and by rankig gives better results, e.g. extra fields weight is counted. sql = f""" - SELECT id, type_name, name_{language_short}, ts_rank_cd(search_column_{language_short}, search_query) - AS rank FROM search_view, {search_fn}('{config_language}','{search_query_str}') search_query - WHERE search_query @@ search_column_{language_short} - ORDER BY rank DESC LIMIT {sql_query_limit}; + SELECT * from ( + SELECT id, type_name, name_{language_short}, ts_rank_cd(search_column_{language_short}, search_query) + AS rank FROM search_view, {search_fn}('{config_language}', %s) search_query + WHERE search_query @@ search_column_{language_short} + ORDER BY rank DESC LIMIT {sql_query_limit} + ) AS sub_query where sub_query.rank >= {rank_threshold}; """ - cursor = connection.cursor() - cursor.execute(sql) + try: + cursor.execute(sql, [search_query_str]) + except Exception as e: + logger.error(f"Error in search query: {e}") + raise ParseError("Search query failed.") # Note, fetchall() consumes the results and once called returns None. all_results = cursor.fetchall() all_ids = get_all_ids_from_sql_results(all_results) @@ -328,8 +492,9 @@ def get(self, request): ) services_qs = services_qs.annotate(num_units=Count("units")).order_by( - "-units__count" + "-num_units" ) + # order_by() call makes duplicate rows appear distinct. This is solved by # fetching the ids and filtering a new queryset using them ids = list(services_qs.values_list("id", flat=True)) @@ -369,9 +534,12 @@ def get(self, request): services = self.request.query_params["service"].strip().split(",") if services[0]: units_qs = units_qs.filter(services__in=services) - units_qs = units_qs.annotate(num_services=Count("services")).order_by( - *units_order_list - ) + + if units_order_list: + units_qs = units_qs.annotate(num_services=Count("services")).order_by( + *units_order_list + ) + units_qs = units_qs[: model_limits["unit"]] else: units_qs = Unit.objects.none() diff --git a/services/search/constants.py b/services/search/constants.py index 5bb0e4e87..967e37a78 100644 --- a/services/search/constants.py +++ b/services/search/constants.py @@ -16,5 +16,4 @@ # The limit value for the search query that search the search_view. "NULL" = no limit DEFAULT_SEARCH_SQL_LIMIT_VALUE = "NULL" DEFAULT_TRIGRAM_THRESHOLD = 0.15 -# If word length is greater or equal then hyphenate word. -LENGTH_OF_HYPHENATED_WORDS = 8 +DEFAULT_RANK_THRESHOLD = 1 diff --git a/services/search/tests/conftest.py b/services/search/tests/conftest.py index 3d7bd4cde..020e1a5d8 100644 --- a/services/search/tests/conftest.py +++ b/services/search/tests/conftest.py @@ -36,7 +36,6 @@ def api_client(): return APIClient() -@pytest.mark.django_db @pytest.fixture def units( services, @@ -106,15 +105,35 @@ def units( ) unit.services.add(5) unit.save() + unit = Unit.objects.create( + id=6, + name="Jäähalli", + last_modified_time=now(), + municipality=municipality, + department=department, + ) + # Add service Halli + unit.services.add(6) + unit.save() + + unit = Unit.objects.create( + id=7, + name="Palloiluhalli", + last_modified_time=now(), + municipality=municipality, + department=department, + ) + # Add service Halli + unit.services.add(6) + unit.save() update_service_root_service_nodes() update_service_counts() update_service_node_counts() generate_syllables(Unit) Unit.objects.update(search_column_fi=get_search_column(Unit, "fi")) - return Unit.objects.all() + return Unit.objects.all().order_by("id") -@pytest.mark.django_db @pytest.fixture def department(municipality): return Department.objects.create( @@ -125,7 +144,6 @@ def department(municipality): ) -@pytest.mark.django_db @pytest.fixture def accessibility_shortcoming(units): unit = Unit.objects.get(name="Biologinen museo") @@ -134,7 +152,6 @@ def accessibility_shortcoming(units): ) -@pytest.mark.django_db @pytest.fixture def services(): Service.objects.create( @@ -167,12 +184,21 @@ def services(): name_sv="konstisbanor", last_modified_time=now(), ) + Service.objects.create( + id=6, + name="Halli", + last_modified_time=now(), + ) + Service.objects.create( + id=7, + name="Hallinto", + last_modified_time=now(), + ) generate_syllables(Service) Service.objects.update(search_column_fi=get_search_column(Service, "fi")) return Service.objects.all() -@pytest.mark.django_db @pytest.fixture def service_nodes(services): leisure = ServiceNode.objects.create( @@ -196,7 +222,6 @@ def service_nodes(services): return ServiceNode.objects.all() -@pytest.mark.django_db @pytest.fixture def addresses(streets, municipality): Address.objects.create( @@ -244,11 +269,18 @@ def addresses(streets, municipality): number=33, full_name="Yliopistonkatu 33", ) + Address.objects.create( + municipality_id=municipality.id, + location=Point(60.1612283, 24.9478104), + id=6, + street_id=45, + number=1, + full_name="Tarkk'ampujankatu 1", + ) Address.objects.update(search_column_fi=get_search_column(Address, "fi")) return Address.objects.all() -@pytest.mark.django_db @pytest.fixture def municipality(): return Municipality.objects.create( @@ -256,7 +288,6 @@ def municipality(): ) -@pytest.mark.django_db @pytest.fixture def administrative_division_type(): return AdministrativeDivisionType.objects.get_or_create( @@ -264,7 +295,6 @@ def administrative_division_type(): ) -@pytest.mark.django_db @pytest.fixture def administrative_division(administrative_division_type): adm_div = AdministrativeDivision.objects.get_or_create( @@ -276,7 +306,6 @@ def administrative_division(administrative_division_type): return adm_div -@pytest.mark.django_db @pytest.fixture def streets(): Street.objects.create( @@ -284,10 +313,10 @@ def streets(): ) Street.objects.create(id=43, name="Markulantie", municipality_id="turku") Street.objects.create(id=44, name="Yliopistonkatu", municipality_id="turku") + Street.objects.create(id=45, name="Tarkk'ampujankatu", municipality_id="turku") return Street.objects.all() -@pytest.mark.django_db @pytest.fixture def exclusion_rules(): ExclusionRule.objects.create(id=1, word="tekojää", exclusion="-nurmi") diff --git a/services/search/tests/test_api.py b/services/search/tests/test_api.py index d79c0cbb2..99d8b2c3d 100644 --- a/services/search/tests/test_api.py +++ b/services/search/tests/test_api.py @@ -79,7 +79,8 @@ def test_search( assert museum_service_node["unit_count"]["municipality"]["turku"] == 1 assert museum_service_node["unit_count"]["total"] == 1 # Test that unit "Impivara" is retrieved from service Uimahalli - url = reverse("search") + "?q=uimahalli&type=unit" + url = reverse("search") + "?q=uimahalli&type=unit&rank_threshold=0" + response = api_client.get(url) results = response.json()["results"] assert results[0]["name"]["fi"] == "Impivaara" @@ -120,6 +121,12 @@ def test_search( assert kurrapolku["location"]["type"] == "Point" assert kurrapolku["location"]["coordinates"][0] == 60.479032 assert kurrapolku["location"]["coordinates"][1] == 22.25417 + # Test address search with apostrophe in query + url = reverse("search") + "?q=tarkk'ampujankatu&type=address" + response = api_client.get(url) + results = response.json()["results"] + assert len(results) == 1 + assert results[0]["name"]["fi"] == "Tarkk'ampujankatu 1" # Test that addresses are sorted by naturalsort. url = reverse("search") + "?q=yliopistonkatu&type=address" response = api_client.get(url) @@ -153,3 +160,85 @@ def test_search( ) response = api_client.get(url) assert len(response.json()["results"]) == 4 + + +@pytest.mark.django_db +def test_search_input_query_validation(api_client): + # Test that | is allowed in query + url = reverse("search") + "?q=halli|museo" + response = api_client.get(url) + assert response.status_code == 200 + + # Test that & is allowed in query + url = reverse("search") + "?q=halli&museo" + response = api_client.get(url) + assert response.status_code == 200 + + # Test that - is allowed in query + url = reverse("search") + "?q=linja-auto" + response = api_client.get(url) + assert response.status_code == 200 + + # Test that " " is allowed in query + url = reverse("search") + "?q=Keskustakirjasto Oodi" + response = api_client.get(url) + assert response.status_code == 200 + + # Test that + is allowed in query + url = reverse("search") + "?q=Keskustakirjasto+Oodi" + response = api_client.get(url) + assert response.status_code == 200 + + # Test that "ääkköset" are allowed in query + url = reverse("search") + "?q=lääkäri" + response = api_client.get(url) + assert response.status_code == 200 + url = reverse("search") + "?q=röntgen" + response = api_client.get(url) + assert response.status_code == 200 + url = reverse("search") + "?q=åbo" + response = api_client.get(url) + assert response.status_code == 200 + + # Test that numbers are allowed in query + url = reverse("search") + "?q=123" + response = api_client.get(url) + assert response.status_code == 200 + + # Test that . is allowed in query + url = reverse("search") + "?q=halli.museo" + response = api_client.get(url) + assert response.status_code == 200 + + # Test that ' is allowed in query + url = reverse("search") + "?q=halli's" + response = api_client.get(url) + assert response.status_code == 200 + + # Test that special characters are not allowed in query + url = reverse("search") + "?q=halli(" + response = api_client.get(url) + assert response.status_code == 400 + assert ( + response.json()["detail"] + == "Invalid search terms, only letters, numbers, spaces and .'+-&| allowed." + ) + + +@pytest.mark.django_db +def test_search_service_order(api_client, units, services): + """ + Test that services are ordered descending by unit count. + """ + url = reverse("search") + "?q=halli&type=service" + response = api_client.get(url) + results = response.json()["results"] + assert len(results) == 3 + assert results[0]["name"]["fi"] == "Halli" + assert results[0]["unit_count"]["total"] == 2 + + assert results[1]["name"]["fi"] == "Uimahalli" + assert results[1]["unit_count"]["total"] == 1 + + assert results[2]["name"]["fi"] == "Hallinto" + assert results[2]["unit_count"]["total"] == 0 diff --git a/services/search/utils.py b/services/search/utils.py index e8611b990..42def8bf3 100644 --- a/services/search/utils.py +++ b/services/search/utils.py @@ -1,29 +1,37 @@ +import logging + import libvoikko from django.db import connection from django.db.models import Case, When +from rest_framework.exceptions import ParseError from services.models import ExclusionRule, ServiceNode, ServiceNodeUnitCount, Unit from services.search.constants import ( DEFAULT_TRIGRAM_THRESHOLD, - LENGTH_OF_HYPHENATED_WORDS, SEARCHABLE_MODEL_TYPE_NAMES, ) +logger = logging.getLogger("search") voikko = libvoikko.Voikko("fi") voikko.setNoUglyHyphenation(True) +def is_compound_word(word): + result = voikko.analyze(word) + if len(result) == 0: + return False + return True if result[0]["WORDBASES"].count("+") > 1 else False + + def hyphenate(word): """ - Returns a list of syllables of the word if word length - is >= LENGTH_OF_HYPHENATE_WORDS + Returns a list of syllables of the word if it is a compound word. """ - word_length = len(word) - if word_length >= LENGTH_OF_HYPHENATED_WORDS: - # By Setting the value to word_length, voikko returns - # the words that are in the compound word, if the word is - # not a compound word it returns the syllables as normal. - voikko.setMinHyphenatedWordLength(word_length) + word = word.strip() + if is_compound_word(word): + # By Setting the setMinHyphenatedWordLength to word_length, + # voikko returns the words that are in the compound word + voikko.setMinHyphenatedWordLength(len(word)) syllables = voikko.hyphenate(word) return syllables.split("-") else: @@ -182,15 +190,18 @@ def get_preserved_order(ids): def get_trigram_results( model, model_name, field, q_val, threshold=DEFAULT_TRIGRAM_THRESHOLD ): - sql = f"""SELECT id, similarity({field}, '{q_val}') AS sml + sql = f"""SELECT id, similarity({field}, %s) AS sml FROM {model_name} - WHERE similarity({field}, '{q_val}') >= {threshold} + WHERE similarity({field}, %s) >= {threshold} ORDER BY sml DESC; """ cursor = connection.cursor() - cursor.execute(sql) + try: + cursor.execute(sql, [q_val, q_val]) + except Exception as e: + logger.error(f"Error in similarity query: {e}") + raise ParseError("Similariy query failed.") all_results = cursor.fetchall() - ids = [row[0] for row in all_results] objs = model.objects.filter(id__in=ids) return objs diff --git a/services/utils/accessibility_shortcoming_calculator.py b/services/utils/accessibility_shortcoming_calculator.py index 202d20b6c..1c587c639 100644 --- a/services/utils/accessibility_shortcoming_calculator.py +++ b/services/utils/accessibility_shortcoming_calculator.py @@ -131,9 +131,11 @@ def _calculate_shortcomings(self, rule, properties, messages, profile_id): "{}: {} {}".format( rule["id"], rule["operator"], - "{}recorded".format("" if message_recorded else "not ") - if not is_ok - else "passed", + ( + "{}recorded".format("" if message_recorded else "not ") + if not is_ok + else "passed" + ), ) ) return is_ok, message_recorded diff --git a/smbackend/settings.py b/smbackend/settings.py index 59bfb9b55..14ada4f3d 100644 --- a/smbackend/settings.py +++ b/smbackend/settings.py @@ -223,7 +223,7 @@ def gettext(s): 991, # health stations 1097, # basic education 2125, # pre school education - 869 # municipal day care + 869, # municipal day care # 25344, # recycling # 25480, # public libraries ], @@ -357,6 +357,7 @@ def gettext(s): "/environment_data/api/v1/data/", "/exceptional_situations/api/v1/situation/", "/exceptional_situations/api/v1/situation_type/", + "/api/v2/search", ] diff --git a/smbackend_turku/importers/divisions.py b/smbackend_turku/importers/divisions.py index 88ed163b4..f53b0d685 100644 --- a/smbackend_turku/importers/divisions.py +++ b/smbackend_turku/importers/divisions.py @@ -4,6 +4,7 @@ and modified to fit the WFS server of Turku. """ + import os import re from datetime import datetime diff --git a/smbackend_turku/importers/geo_search.py b/smbackend_turku/importers/geo_search.py index cef01c379..d22f74e91 100644 --- a/smbackend_turku/importers/geo_search.py +++ b/smbackend_turku/importers/geo_search.py @@ -205,9 +205,9 @@ def save_page(self, results, municipality): for result in results: postal_code = result["postal_code_area"]["postal_code"] if postal_code not in self.postal_code_areas_cache: - self.postal_code_areas_cache[ - postal_code - ] = self.get_or_create_postal_code_area(postal_code, result) + self.postal_code_areas_cache[postal_code] = ( + self.get_or_create_postal_code_area(postal_code, result) + ) ( street_name_fi, @@ -353,9 +353,9 @@ def enrich_page(self, results, municipality): postal_code = result["postal_code_area"]["postal_code"] if postal_code not in self.postal_code_areas_cache: - self.postal_code_areas_cache[ - postal_code - ] = self.get_or_create_postal_code_area(postal_code, result) + self.postal_code_areas_cache[postal_code] = ( + self.get_or_create_postal_code_area(postal_code, result) + ) # name_sv is not added as there might be a swedish translation street_entry = { "name": street_name_fi, diff --git a/smbackend_turku/tests/test_charging_stations.py b/smbackend_turku/tests/test_charging_stations.py index 618e83c17..04859eae7 100644 --- a/smbackend_turku/tests/test_charging_stations.py +++ b/smbackend_turku/tests/test_charging_stations.py @@ -12,9 +12,9 @@ @pytest.mark.django_db -@patch("mobility_data.importers.charging_stations.get_csv_file_name") +@patch("mobility_data.importers.charging_stations.get_full_csv_file_name") def test_charging_stations_import( - get_csv_file_name_mock, + get_full_csv_file_name_mock, municipality, administrative_division, administrative_division_type, @@ -32,7 +32,7 @@ def test_charging_stations_import( id=42, name="Vapaa-aika", last_modified_time=datetime.now(utc_timezone) ) file_name = f"{settings.BASE_DIR}/mobility_data/tests/data/charging_stations.csv" - get_csv_file_name_mock.return_value = file_name + get_full_csv_file_name_mock.return_value = file_name import_charging_stations( logger=logger, config=config,