diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml index feeb729fd..9518dd399 100644 --- a/.github/workflows/run-tests.yml +++ b/.github/workflows/run-tests.yml @@ -17,6 +17,9 @@ jobs: ADDITIONAL_INSTALLED_APPS: smbackend_turku,ptv PTV_ID_OFFSET: 10000000 LAM_COUNTER_API_BASE_URL: https://tie.digitraffic.fi/api/tms/v1/history + ECO_COUNTER_STATIONS_URL: https://dev.turku.fi/datasets/ecocounter/liikennelaskimet.geojson + ECO_COUNTER_OBSERVATIONS_URL: https://data.turku.fi/cjtv3brqr7gectdv7rfttc/counters-15min.csv + TRAFFIC_COUNTER_OBSERVATIONS_BASE_URL: https://data.turku.fi/2yxpk2imqi2mzxpa6e6knq/ steps: - uses: actions/checkout@v3 diff --git a/eco_counter/README.md b/eco_counter/README.md index 32bdcad68..c87f88554 100644 --- a/eco_counter/README.md +++ b/eco_counter/README.md @@ -34,6 +34,13 @@ e.g. ./manage.py import_counter_data --counters EC TC Counter names are: EC (Eco Counter), TC (Traffic Counter), LC (Lam Counter) and TR (Telraam Counter). Note, Traffic Counter data is updated once a week and Lam Counter data once a day. +## Deleting data +To delete data use the delete_counter_data management command. +e.g. to delete all Lam Counter data type: +``` +./manage.py delete_counter_data --counters LC +``` + ### Importing Telraam raw data In order to import Telraam data into the database the raw data has to be imported. The raw data is imported with the _import_telraam_to_csv_ management command. The imported should be set to be run once a hour (see: https://github.com/City-of-Turku/smbackend/wiki/Celery-Tasks#telraam-to-csv-eco_countertasksimport_telraam_to_csv ) diff --git a/eco_counter/api/serializers.py b/eco_counter/api/serializers.py index ea095df18..685c594dd 100644 --- a/eco_counter/api/serializers.py +++ b/eco_counter/api/serializers.py @@ -1,3 +1,5 @@ +from datetime import date, timedelta + from django.db.models import Q from rest_framework import serializers @@ -28,6 +30,7 @@ "value_bp", "value_bt", ] +Q_EXP = Q(value_at__gt=0) | Q(value_pt__gt=0) | Q(value_jt__gt=0) | Q(value_bt__gt=0) class StationSerializer(serializers.ModelSerializer): @@ -36,7 +39,9 @@ class StationSerializer(serializers.ModelSerializer): lon = serializers.SerializerMethodField() lat = serializers.SerializerMethodField() sensor_types = serializers.SerializerMethodField() - data_from_year = serializers.SerializerMethodField() + data_until_date = serializers.SerializerMethodField() + data_from_date = serializers.SerializerMethodField() + is_active = serializers.SerializerMethodField() class Meta: model = Station @@ -54,7 +59,9 @@ class Meta: "lon", "lat", "sensor_types", - "data_from_year", + "data_until_date", + "data_from_date", + "is_active", ] def get_y(self, obj): @@ -82,17 +89,35 @@ def get_sensor_types(self, obj): result.append(type) return result - def get_data_from_year(self, obj): - q_exp = ( - Q(value_at__gt=0) - | Q(value_pt__gt=0) - | Q(value_jt__gt=0) - | Q(value_bt__gt=0) - ) - qs = YearData.objects.filter(q_exp, station=obj).order_by("year__year_number") - if qs.count() > 0: - return qs[0].year.year_number - else: + def get_is_active(self, obj): + num_days = [1, 7, 30, 365] + res = {} + for days in num_days: + from_date = date.today() - timedelta(days=days - 1) + day_qs = Day.objects.filter(station=obj, date__gte=from_date) + day_data_qs = DayData.objects.filter(day__in=day_qs) + if day_data_qs.filter(Q_EXP).count() > 0: + res[days] = True + else: + res[days] = False + return res + + def get_data_until_date(self, obj): + try: + return ( + DayData.objects.filter(Q_EXP, station=obj).latest("day__date").day.date + ) + except DayData.DoesNotExist: + return None + + def get_data_from_date(self, obj): + try: + return ( + DayData.objects.filter(Q_EXP, station=obj) + .earliest("day__date") + .day.date + ) + except DayData.DoesNotExist: return None diff --git a/eco_counter/constants.py b/eco_counter/constants.py index 94d4ab1a8..c41829e77 100644 --- a/eco_counter/constants.py +++ b/eco_counter/constants.py @@ -30,6 +30,10 @@ COUNTERS.LAM_COUNTER = LAM_COUNTER COUNTERS.TELRAAM_COUNTER = TELRAAM_COUNTER +COUNTERS_LIST = [ECO_COUNTER, TRAFFIC_COUNTER, LAM_COUNTER, TELRAAM_COUNTER] +COUNTER_CHOICES_STR = ( + f"{ECO_COUNTER}, {TRAFFIC_COUNTER}, {TELRAAM_COUNTER} and {LAM_COUNTER}" +) CSV_DATA_SOURCES = ( (TRAFFIC_COUNTER, "TrafficCounter"), (ECO_COUNTER, "EcoCounter"), @@ -118,12 +122,14 @@ TELRAAM_COUNTER_CSV_FILE_PATH = f"{settings.MEDIA_ROOT}/telraam_data/" TELRAAM_COUNTER_CSV_FILE = ( - TELRAAM_COUNTER_CSV_FILE_PATH + "telraam_data_{id}_{day}_{month}_{year}.csv" + TELRAAM_COUNTER_CSV_FILE_PATH + "telraam_data_{mac}_{day}_{month}_{year}.csv" ) +TELRAAM_STATION_350457790598039 = 350457790598039 +TELRAAM_STATION_350457790600975 = 350457790600975 TELRAAM_COUNTER_CAMERAS = { # Mac id: Direction flag (True=rgt prefix will be keskustaan päin) - 350457790598039: False, # Kristiinanankatu, Joelle katsottaessa vasemmalle - 350457790600975: True, # Kristiinanankatu, Joelle katsottaessa oikealle + TELRAAM_STATION_350457790598039: False, # Kristiinanankatu, Joelle katsottaessa vasemmalle + TELRAAM_STATION_350457790600975: True, # Kristiinanankatu, Joelle katsottaessa oikealle } # For 429 (too many request) TELRAAM need a retry strategy retry_strategy = Retry( @@ -136,3 +142,23 @@ TELRAAM_HTTP = requests.Session() TELRAAM_HTTP.mount("https://", adapter) TELRAAM_HTTP.mount("http://", adapter) + + +# Telraam stations initial geometries in WKT format +# These coordinates are used if CSV files do not include any geometries. +TELRAAM_STATIONS_INITIAL_WKT_GEOMETRIES = { + TELRAAM_STATION_350457790598039: { + "location": "POINT (239628.47846388057 6710757.471557152)", + "geometry": "MULTILINESTRING ((239565.80107971327 6710861.8209667895, 239572.58901459936 6710850.524818219," + " 239574.73294378238 6710846.950531884, 239628.47846388057 6710757.471557152," + " 239630.0339247121 6710754.923177836, 239635.52748551324 6710745.732077925))", + }, + TELRAAM_STATION_350457790600975: { + "location": "POINT (239523.2288977413 6710932.715108742)", + "geometry": "MULTILINESTRING ((239490.42663459244 6710989.092283992, 239493.45037993207 6710983.7110835295," + " 239495.88642941663 6710979.3668986475, 239517.9904128411 6710941.530425406," + " 239520.0691194288 6710937.971973339, 239523.2288977413 6710932.715108742," + " 239529.37000273907 6710922.482472116, 239558.08254550528 6710874.681753734," + " 239559.97438753376 6710871.516775628, 239565.80107971327 6710861.8209667895))", + }, +} diff --git a/eco_counter/management/commands/delete_all_counter_data.py b/eco_counter/management/commands/delete_all_counter_data.py deleted file mode 100644 index 9633df83c..000000000 --- a/eco_counter/management/commands/delete_all_counter_data.py +++ /dev/null @@ -1,17 +0,0 @@ -import logging - -from django import db -from django.core.management.base import BaseCommand - -from eco_counter.models import ImportState, Station - -logger = logging.getLogger("eco_counter") - - -class Command(BaseCommand): - @db.transaction.atomic - def handle(self, *args, **options): - logger.info("Deleting all counter data...") - logger.info(f"{Station.objects.all().delete()}") - logger.info(f"{ImportState.objects.all().delete()}") - logger.info("Deleted all counter data.") diff --git a/eco_counter/management/commands/delete_counter_data.py b/eco_counter/management/commands/delete_counter_data.py new file mode 100644 index 000000000..e145e3de7 --- /dev/null +++ b/eco_counter/management/commands/delete_counter_data.py @@ -0,0 +1,36 @@ +import logging + +from django import db +from django.core.management.base import BaseCommand + +from eco_counter.constants import COUNTER_CHOICES_STR +from eco_counter.management.commands.utils import check_counters_argument +from eco_counter.models import ImportState, Station + +logger = logging.getLogger("eco_counter") + + +class Command(BaseCommand): + def add_arguments(self, parser): + parser.add_argument( + "--counters", + type=str, + nargs="+", + default=False, + help=f"Delete given counter data, choices are: {COUNTER_CHOICES_STR}.", + ) + + @db.transaction.atomic + def handle(self, *args, **options): + counters = options.get("counters", None) + check_counters_argument(counters) + if counters: + for counter in counters: + logger.info(f"Deleting counter data for {counter}") + logger.info( + f"{Station.objects.filter(csv_data_source=counter).delete()}" + ) + logger.info( + f"{ImportState.objects.filter(csv_data_source=counter).delete()}" + ) + logger.info("Deleted counter data.") diff --git a/eco_counter/management/commands/import_counter_data.py b/eco_counter/management/commands/import_counter_data.py index 0a607e682..01fbcc203 100644 --- a/eco_counter/management/commands/import_counter_data.py +++ b/eco_counter/management/commands/import_counter_data.py @@ -16,6 +16,7 @@ from django.core.management.base import BaseCommand, CommandError from eco_counter.constants import ( + COUNTER_CHOICES_STR, COUNTER_START_YEARS, COUNTERS, ECO_COUNTER, @@ -41,10 +42,12 @@ ) from .utils import ( + check_counters_argument, gen_eco_counter_test_csv, get_eco_counter_csv, get_lam_counter_csv, - get_telraam_counter_csv, + get_or_create_telraam_station, + get_telraam_data_frames, get_test_dataframe, get_traffic_counter_csv, save_stations, @@ -63,295 +66,400 @@ # on a six lane road during the sample time (15min), 15min*60s*6lanes. # If the value is greater than the threshold value the value is set to 0. ERRORNEOUS_VALUE_THRESHOLD = 5400 +TIMEZONE = pytz.timezone("Europe/Helsinki") +""" +Movement types: +(A)uto, car +(P)yörä, bicycle +(J)alankulkija, pedestrian +(B)ussi, bus +Direction types: +(K)eskustaan päin, towards the center +(P)poispäin keskustasta, away from the center +So for the example column with prefix "ap" contains data for cars moving away from the center. +The naming convention is derived from the eco-counter source data that was the +original data source. +""" +STATION_TYPES = [ + ("ak", "ap", "at"), + ("pk", "pp", "pt"), + ("jk", "jp", "jt"), + ("bk", "bp", "bt"), +] + +TYPE_DIRS = ["AK", "AP", "JK", "JP", "BK", "BP", "PK", "PP"] +ALL_TYPE_DIRS = TYPE_DIRS + ["AT", "JT", "BT", "PT"] + + +def delete_tables( + csv_data_sources=[ECO_COUNTER, TRAFFIC_COUNTER, LAM_COUNTER, TELRAAM_COUNTER], +): + for csv_data_source in csv_data_sources: + for station in Station.objects.filter(csv_data_source=csv_data_source): + Year.objects.filter(station=station).delete() + ImportState.objects.filter(csv_data_source=csv_data_source).delete() + + +def save_hour_data_values(hour_data, values): + for td in ALL_TYPE_DIRS: + setattr(hour_data, f"values_{td.lower()}", values[td]) + hour_data.save() + + +def save_values(values, dst_obj): + for station_types in STATION_TYPES: + setattr(dst_obj, f"value_{station_types[0]}", values[station_types[0]]) + setattr(dst_obj, f"value_{station_types[1]}", values[station_types[1]]) + setattr( + dst_obj, + f"value_{station_types[2]}", + values[station_types[0]] + values[station_types[1]], + ) + dst_obj.save() -class Command(BaseCommand): - help = "Imports traffic counter data in the Turku region." - COUNTERS = [ECO_COUNTER, TRAFFIC_COUNTER, LAM_COUNTER, TELRAAM_COUNTER] - COUNTER_CHOICES_STR = ( - f"{ECO_COUNTER}, {TRAFFIC_COUNTER}, {TELRAAM_COUNTER} and {LAM_COUNTER}" - ) - TIMEZONE = pytz.timezone("Europe/Helsinki") - """ - Movement types: - (A)uto, car - (P)yörä, bicycle - (J)alankulkija, pedestrian - (B)ussi, bus - Direction types: - (K)eskustaan päin, towards the center - (P)poispäin keskustasta, away from the center - So for the example column with prefix "ap" contains data for cars moving away from the center. - The naming convention is derived from the eco-counter source data that was the - original data source. +def add_values(values, dst_obj): """ - STATION_TYPES = [ - ("ak", "ap", "at"), - ("pk", "pp", "pt"), - ("jk", "jp", "jt"), - ("bk", "bp", "bt"), - ] - - TYPE_DIRS = ["AK", "AP", "JK", "JP", "BK", "BP", "PK", "PP"] - ALL_TYPE_DIRS = TYPE_DIRS + ["AT", "JT", "BT", "PT"] - type_dirs_lower = [TD.lower() for TD in TYPE_DIRS] - - def delete_tables( - self, - csv_data_sources=[ECO_COUNTER, TRAFFIC_COUNTER, LAM_COUNTER, TELRAAM_COUNTER], - ): - for csv_data_source in csv_data_sources: - for station in Station.objects.filter(csv_data_source=csv_data_source): - Year.objects.filter(station=station).delete() - ImportState.objects.filter(csv_data_source=csv_data_source).delete() - - def save_values(self, values, dst_obj): - for station_types in self.STATION_TYPES: - setattr(dst_obj, f"value_{station_types[0]}", values[station_types[0]]) - setattr(dst_obj, f"value_{station_types[1]}", values[station_types[1]]) - setattr( - dst_obj, - f"value_{station_types[2]}", - values[station_types[0]] + values[station_types[1]], - ) - dst_obj.save() - - def add_values(self, values, dst_obj): - """ - Populate values for all movement types and directions for a station. - """ - for station_types in self.STATION_TYPES: - key = f"value_{station_types[0]}" - k_val = getattr(dst_obj, key, 0) + values[station_types[0]] - setattr(dst_obj, key, k_val) - key = f"value_{station_types[1]}" - p_val = getattr(dst_obj, key, 0) + values[station_types[1]] - setattr(dst_obj, key, p_val) - key = f"value_{station_types[2]}" - t_val = ( - getattr(dst_obj, key, 0) - + values[station_types[0]] - + values[station_types[1]] - ) - setattr(dst_obj, key, t_val) - dst_obj.save() - - def get_values(self, sum_series, station_name): - """ - Returns a dict containing the aggregated sum value for every movement type and direction. - """ - values = {} - for type_dir in self.TYPE_DIRS: - key = f"{station_name} {type_dir}" - values[type_dir.lower()] = sum_series.get(key, 0) - return values - - def save_years(self, df, stations): - logger.info("Saving years...") - years = df.groupby(df.index.year) - for index, row in years: - logger.info(f"Saving year {index}") - sum_series = row.sum() - for station in stations: - year, _ = Year.objects.get_or_create(station=station, year_number=index) - values = self.get_values(sum_series, station.name) - year_data, _ = YearData.objects.get_or_create( - year=year, station=station - ) - self.save_values(values, year_data) - - def save_months(self, df, stations): - logger.info("Saving months...") - months = df.groupby([df.index.year, df.index.month]) - for index, row in months: - year_number, month_number = index - logger.info(f"Saving month {month_number} of year {year_number}") - sum_series = row.sum() - for station in stations: - year, _ = Year.objects.get_or_create( - station=station, year_number=year_number - ) - month, _ = Month.objects.get_or_create( - station=station, year=year, month_number=month_number - ) - values = self.get_values(sum_series, station.name) - month_data, _ = MonthData.objects.get_or_create( - year=year, month=month, station=station - ) - self.save_values(values, month_data) + Populate values for all movement types and directions for a station. + """ + for station_types in STATION_TYPES: + key = f"value_{station_types[0]}" + k_val = getattr(dst_obj, key, 0) + values[station_types[0]] + setattr(dst_obj, key, k_val) + key = f"value_{station_types[1]}" + p_val = getattr(dst_obj, key, 0) + values[station_types[1]] + setattr(dst_obj, key, p_val) + key = f"value_{station_types[2]}" + t_val = ( + getattr(dst_obj, key, 0) + + values[station_types[0]] + + values[station_types[1]] + ) + setattr(dst_obj, key, t_val) + dst_obj.save() + - def save_current_year(self, stations, year_number, end_month_number): - logger.info(f"Saving current year {year_number}") +def get_values(sum_series, station_name): + """ + Returns a dict containing the aggregated sum value for every movement type and direction. + """ + values = {} + for type_dir in TYPE_DIRS: + key = f"{station_name} {type_dir}" + values[type_dir.lower()] = sum_series.get(key, 0) + return values + + +def save_years(df, stations): + logger.info("Saving years...") + years = df.groupby(df.index.year) + for index, row in years: + logger.info(f"Saving year {index}") + sum_series = row.sum() + for station in stations: + year, _ = Year.objects.get_or_create(station=station, year_number=index) + values = get_values(sum_series, station.name) + year_data, _ = YearData.objects.get_or_create(year=year, station=station) + save_values(values, year_data) + + +def save_months(df, stations): + logger.info("Saving months...") + months = df.groupby([df.index.year, df.index.month]) + for index, row in months: + year_number, month_number = index + logger.info(f"Saving month {month_number} of year {year_number}") + sum_series = row.sum() for station in stations: year, _ = Year.objects.get_or_create( station=station, year_number=year_number ) - year_data, _ = YearData.objects.get_or_create(station=station, year=year) - for station_types in self.STATION_TYPES: - setattr(year_data, f"value_{station_types[0]}", 0) - setattr(year_data, f"value_{station_types[1]}", 0) - setattr(year_data, f"value_{station_types[2]}", 0) - for month_number in range(1, end_month_number + 1): - month, _ = Month.objects.get_or_create( - station=station, year=year, month_number=month_number - ) - month_data, _ = MonthData.objects.get_or_create( - station=station, month=month, year=year - ) - for station_types in self.STATION_TYPES: - for i in range(3): - key = f"value_{station_types[i]}" - m_val = getattr(month_data, key, 0) - y_val = getattr(year_data, key, 0) - setattr(year_data, key, m_val + y_val) - year_data.save() - - def save_weeks(self, df, stations): - logger.info("Saving weeks...") - weeks = df.groupby([df.index.year, df.index.isocalendar().week]) - for index, row in weeks: - year_number, week_number = index - logger.info(f"Saving week number {week_number} of year {year_number}") - sum_series = row.sum() - for station in stations: - year = Year.objects.get(station=station, year_number=year_number) - week, _ = Week.objects.get_or_create( - station=station, - week_number=week_number, - years__year_number=year_number, - ) - if week.years.count() == 0: - week.years.add(year) + month, _ = Month.objects.get_or_create( + station=station, year=year, month_number=month_number + ) + values = get_values(sum_series, station.name) + month_data, _ = MonthData.objects.get_or_create( + year=year, month=month, station=station + ) + save_values(values, month_data) + + +def save_current_year(stations, year_number, end_month_number): + logger.info(f"Saving current year {year_number}") + for station in stations: + year, _ = Year.objects.get_or_create(station=station, year_number=year_number) + year_data, _ = YearData.objects.get_or_create(station=station, year=year) + for station_types in STATION_TYPES: + setattr(year_data, f"value_{station_types[0]}", 0) + setattr(year_data, f"value_{station_types[1]}", 0) + setattr(year_data, f"value_{station_types[2]}", 0) + for month_number in range(1, end_month_number + 1): + month, _ = Month.objects.get_or_create( + station=station, year=year, month_number=month_number + ) + month_data, _ = MonthData.objects.get_or_create( + station=station, month=month, year=year + ) + for station_types in STATION_TYPES: + for i in range(3): + key = f"value_{station_types[i]}" + m_val = getattr(month_data, key, 0) + y_val = getattr(year_data, key, 0) + setattr(year_data, key, m_val + y_val) + year_data.save() + + +def save_weeks(df, stations): + logger.info("Saving weeks...") + weeks = df.groupby([df.index.year, df.index.isocalendar().week]) + for index, row in weeks: + year_number, week_number = index + logger.info(f"Saving week number {week_number} of year {year_number}") + sum_series = row.sum() + for station in stations: + year = Year.objects.get(station=station, year_number=year_number) + week, _ = Week.objects.get_or_create( + station=station, + week_number=week_number, + years__year_number=year_number, + ) + if week.years.count() == 0: + week.years.add(year) - values = self.get_values(sum_series, station.name) - week_data, _ = WeekData.objects.get_or_create( - station=station, week=week - ) - self.save_values(values, week_data) + values = get_values(sum_series, station.name) + week_data, _ = WeekData.objects.get_or_create(station=station, week=week) + save_values(values, week_data) - def save_days(self, df, stations): - logger.info("Saving days...") - days = df.groupby( - [df.index.year, df.index.month, df.index.isocalendar().week, df.index.day] - ) - prev_week_number = None - for index, row in days: - year_number, month_number, week_number, day_number = index - date = datetime(year_number, month_number, day_number) +def save_days(df, stations): + logger.info("Saving days...") + days = df.groupby( + [df.index.year, df.index.month, df.index.isocalendar().week, df.index.day] + ) + prev_week_number = None + for index, row in days: + year_number, month_number, week_number, day_number = index + + date = datetime(year_number, month_number, day_number) + sum_series = row.sum() + for station in stations: + year = Year.objects.get(station=station, year_number=year_number) + month = Month.objects.get( + station=station, year=year, month_number=month_number + ) + week = Week.objects.get( + station=station, years=year, week_number=week_number + ) + day, _ = Day.objects.get_or_create( + station=station, + date=date, + weekday_number=date.weekday(), + year=year, + month=month, + week=week, + ) + values = get_values(sum_series, station.name) + day_data, _ = DayData.objects.get_or_create(station=station, day=day) + save_values(values, day_data) + if not prev_week_number or prev_week_number != week_number: + prev_week_number = week_number + logger.info(f"Saved days for week {week_number} of year {year_number}") + + +def save_hours(df, stations): + logger.info("Saving hours...") + hours = df.groupby([df.index.year, df.index.month, df.index.day, df.index.hour]) + for i_station, station in enumerate(stations): + prev_day_number = None + prev_month_number = None + values = {k: [] for k in ALL_TYPE_DIRS} + for index, row in hours: sum_series = row.sum() - for station in stations: - year = Year.objects.get(station=station, year_number=year_number) - month = Month.objects.get( - station=station, year=year, month_number=month_number - ) - week = Week.objects.get( - station=station, years=year, week_number=week_number - ) - day, _ = Day.objects.get_or_create( + year_number, month_number, day_number, _ = index + if not prev_day_number: + prev_day_number = day_number + if not prev_month_number: + prev_month_number = month_number + + if day_number != prev_day_number or month_number != prev_month_number: + """ + If day or month changed. Save the hours for the day and clear the values dict. + """ + if month_number != prev_month_number: + prev_day_number = day_number + day = Day.objects.get( + date=datetime(year_number, month_number, prev_day_number), station=station, - date=date, - weekday_number=date.weekday(), - year=year, - month=month, - week=week, ) - values = self.get_values(sum_series, station.name) - day_data, _ = DayData.objects.get_or_create(station=station, day=day) - self.save_values(values, day_data) - if not prev_week_number or prev_week_number != week_number: - prev_week_number = week_number - logger.info(f"Saved days for week {week_number} of year {year_number}") - - def save_hours(self, df, stations): - logger.info("Saving hours...") - hours = df.groupby([df.index.year, df.index.month, df.index.day, df.index.hour]) - for i_station, station in enumerate(stations): - prev_day_number = None - prev_month_number = None - values = {k: [] for k in self.ALL_TYPE_DIRS} - for index, row in hours: - sum_series = row.sum() - year_number, month_number, day_number, _ = index - if not prev_day_number: - prev_day_number = day_number - if not prev_month_number: - prev_month_number = month_number - - if day_number != prev_day_number or month_number != prev_month_number: - """ - If day or month changed. Save the hours for the day and clear the values dict. - """ - if month_number != prev_month_number: - prev_day_number = day_number - day = Day.objects.get( - date=datetime(year_number, month_number, prev_day_number), - station=station, + hour_data, _ = HourData.objects.get_or_create(station=station, day=day) + save_hour_data_values(hour_data, values) + values = {k: [] for k in ALL_TYPE_DIRS} + # output logger only when last station is saved + if i_station == len(stations) - 1: + logger.info( + f"Saved hour data for day {prev_day_number}, month {prev_month_number} year {year_number}" ) - hour_data, _ = HourData.objects.get_or_create( - station=station, day=day - ) - for td in self.ALL_TYPE_DIRS: - setattr(hour_data, f"values_{td.lower()}", values[td]) - hour_data.save() - values = {k: [] for k in self.ALL_TYPE_DIRS} - # output logger only when last station is saved - if i_station == len(stations) - 1: - logger.info( - f"Saved hour data for day {prev_day_number}, month {prev_month_number} year {year_number}" - ) - prev_day_number = day_number - prev_month_number = month_number - else: - # Add data to values dict for an hour - for station_types in self.STATION_TYPES: - for i in range(3): - if i < 2: - dir_key = f"{station.name} {station_types[i].upper()}" - val = sum_series.get(dir_key, 0) - else: - k_key = f"{station.name} {station_types[0].upper()}" - p_key = f"{station.name} {station_types[1].upper()}" - val = sum_series.get(p_key, 0) + sum_series.get( - k_key, 0 - ) - values_key = station_types[i].upper() - values[values_key].append(val) - - def save_observations(self, csv_data, start_time, csv_data_source=ECO_COUNTER): - import_state = ImportState.objects.get(csv_data_source=csv_data_source) - # Populate stations list, this is used to set/lookup station relations. + prev_day_number = day_number + prev_month_number = month_number + # Add data to values dict for an hour + for station_types in STATION_TYPES: + for i in range(len(station_types)): + if i < 2: + dir_key = f"{station.name} {station_types[i].upper()}" + val = sum_series.get(dir_key, 0) + else: + k_key = f"{station.name} {station_types[0].upper()}" + p_key = f"{station.name} {station_types[1].upper()}" + val = sum_series.get(p_key, 0) + sum_series.get(k_key, 0) + values_key = station_types[i].upper() + values[values_key].append(val) + + # Save hour datas for the last day in data frame + day, _ = Day.objects.get_or_create( + date=datetime(year_number, month_number, day_number), + station=station, + ) + hour_data, _ = HourData.objects.get_or_create(station=station, day=day) + save_hour_data_values(hour_data, values) + + +def save_observations(csv_data, start_time, csv_data_source=ECO_COUNTER, station=None): + import_state = ImportState.objects.get(csv_data_source=csv_data_source) + # Populate stations list, this is used to set/lookup station relations. + if not station: stations = [ station for station in Station.objects.filter(csv_data_source=csv_data_source) ] - df = csv_data - df["Date"] = pd.to_datetime(df["startTime"], format="%Y-%m-%dT%H:%M") - df = df.drop("startTime", axis=1) - df = df.set_index("Date") - # Fill missing cells with the value 0 - df = df.fillna(0) - # Set negative numbers to 0 - df = df.clip(lower=0) - # Set values higher than ERRORNEOUS_VALUES_THRESHOLD to 0 - df[df > ERRORNEOUS_VALUE_THRESHOLD] = 0 - if not import_state.current_year_number: - # In initial import populate all years. - self.save_years(df, stations) - self.save_months(df, stations) - if import_state.current_year_number: - end_month_number = df.index[-1].month - self.save_current_year(stations, start_time.year, end_month_number) - - self.save_weeks(df, stations) - self.save_days(df, stations) - self.save_hours(df, stations) - end_date = df.index[-1] - import_state.current_year_number = end_date.year - import_state.current_month_number = end_date.month - import_state.save() - logger.info(f"Imported observations until:{str(end_date)}") + else: + stations = [station] + df = csv_data + df["Date"] = pd.to_datetime(df["startTime"], format="%Y-%m-%dT%H:%M") + df = df.drop("startTime", axis=1) + df = df.set_index("Date") + # Fill missing cells with the value 0 + df = df.fillna(0) + # Set negative numbers to 0 + df = df.clip(lower=0) + # Set values higher than ERRORNEOUS_VALUES_THRESHOLD to 0 + df[df > ERRORNEOUS_VALUE_THRESHOLD] = 0 + if not import_state.current_year_number: + # In initial import populate all years. + save_years(df, stations) + save_months(df, stations) + if import_state.current_year_number: + end_month_number = df.index[-1].month + save_current_year(stations, start_time.year, end_month_number) + + save_weeks(df, stations) + save_days(df, stations) + save_hours(df, stations) + end_date = df.index[-1] + import_state.current_year_number = end_date.year + import_state.current_month_number = end_date.month + import_state.current_day_number = end_date.day + import_state.save() + logger.info(f"Imported observations until:{str(end_date)}") + + +def save_telraam_data(start_time): + data_frames = get_telraam_data_frames(start_time.date()) + for item in data_frames.items(): + if len(item) == 0: + logger.error("Found Telraam dataframe without data") + break + station = get_or_create_telraam_station(item[0]) + logger.info(f"Saving Telraam station {station.name}") + # Save dataframes for the camera(station) + for csv_data in item[1]: + start_time = csv_data.iloc[0][0].to_pydatetime() + save_observations( + csv_data, + start_time, + csv_data_source=TELRAAM_COUNTER, + station=station, + ) + + +def handle_initial_import(initial_import_counters): + delete_tables(csv_data_sources=initial_import_counters) + for counter in initial_import_counters: + ImportState.objects.filter(csv_data_source=counter).delete() + ImportState.objects.create(csv_data_source=counter) + logger.info(f"Retrieving stations for {counter}.") + # As Telraam counters are dynamic, create after CSV data is processed + if counter == TELRAAM_COUNTER: + Station.objects.filter(csv_data_source=counter).delete() + else: + save_stations(counter) + + +def import_data(counters): + for counter in counters: + logger.info(f"Importing/counting data for {counter}...") + import_state = ImportState.objects.filter(csv_data_source=counter).first() + if not import_state: + logger.error( + "ImportState instance not found, try importing with the '--init' argument." + ) + break + if import_state.current_year_number and import_state.current_month_number: + start_time = "{year}-{month}-1T00:00".format( + year=import_state.current_year_number, + month=import_state.current_month_number, + ) + else: + start_month = ( + TELRAAM_COUNTER_START_MONTH if counter == TELRAAM_COUNTER else "01" + ) + start_time = f"{COUNTER_START_YEARS[counter]}-{start_month}-01" + + start_time = dateutil.parser.parse(start_time) + start_time = TIMEZONE.localize(start_time) + # The timeformat for the input data is : 2020-03-01T00:00 + # Convert starting time to input datas timeformat + start_time_string = start_time.strftime("%Y-%m-%dT%H:%M") + match counter: + # case COUNTERS.TELRAAM_COUNTER: + # Telraam counters are handled differently due to their dynamic nature + case COUNTERS.LAM_COUNTER: + csv_data = get_lam_counter_csv(start_time.date()) + case COUNTERS.ECO_COUNTER: + csv_data = get_eco_counter_csv() + case COUNTERS.TRAFFIC_COUNTER: + if import_state.current_year_number: + start_year = import_state.current_year_number + else: + start_year = TRAFFIC_COUNTER_START_YEAR + csv_data = get_traffic_counter_csv(start_year=start_year) + + if counter == TELRAAM_COUNTER: + save_telraam_data(start_time) + else: + start_index = csv_data.index[ + csv_data[INDEX_COLUMN_NAME] == start_time_string + ].values[0] + # As LAM data is fetched with a timespan, no index data is available, instead + # show time. + if counter == LAM_COUNTER: + logger.info(f"Starting saving observations at time:{start_time}") + else: + logger.info(f"Starting saving observations at index:{start_index}") + + csv_data = csv_data[start_index:] + save_observations( + csv_data, + start_time, + csv_data_source=counter, + ) + # Try to free some memory + del csv_data + gc.collect() + + +class Command(BaseCommand): + help = "Imports traffic counter data in the Turku region." def add_arguments(self, parser): parser.add_argument( @@ -360,7 +468,7 @@ def add_arguments(self, parser): nargs="+", default=False, help=f"For given counters in arguments deletes all tables before importing, imports stations and\ - starts importing from row 0. The counter arguments are: {self.COUNTER_CHOICES_STR}", + starts importing from row 0. The counter arguments are: {COUNTER_CHOICES_STR}", ) parser.add_argument( "--test-counter", @@ -374,118 +482,45 @@ def add_arguments(self, parser): type=str, nargs="+", default=False, - help=f"Import specific counter(s) data, choices are: {self.COUNTER_CHOICES_STR}.", + help=f"Import specific counter(s) data, choices are: {COUNTER_CHOICES_STR}.", ) - def check_counters_argument(self, counters): - for counter in counters: - if counter not in self.COUNTERS: - raise CommandError( - f"Invalid counter type, valid types are: {self.COUNTER_CHOICES_STR}." - ) - def handle(self, *args, **options): initial_import_counters = None start_time = None if options["initial_import"]: if len(options["initial_import"]) == 0: raise CommandError( - f"Specify the counter(s), choices are: {self.COUNTER_CHOICES_STR}." + f"Specify the counter(s), choices are: {COUNTER_CHOICES_STR}." ) else: initial_import_counters = options["initial_import"] - self.check_counters_argument(initial_import_counters) + check_counters_argument(initial_import_counters) logger.info(f"Deleting tables for: {initial_import_counters}") - self.delete_tables(csv_data_sources=initial_import_counters) - for counter in initial_import_counters: - ImportState.objects.filter(csv_data_source=counter).delete() - import_state = ImportState.objects.create( - csv_data_source=counter, - ) - logger.info(f"Retrieving stations for {counter}.") - save_stations(counter) + handle_initial_import(initial_import_counters) if options["test_counter"]: logger.info("Testing eco_counter importer.") counter = options["test_counter"][0] start_time = options["test_counter"][1] end_time = options["test_counter"][2] - import_state, _ = ImportState.objects.get_or_create(csv_data_source=counter) + ImportState.objects.get_or_create(csv_data_source=counter) test_dataframe = get_test_dataframe(counter) csv_data = gen_eco_counter_test_csv( test_dataframe.keys(), start_time, end_time ) - self.save_observations( + save_observations( csv_data, start_time, csv_data_source=counter, ) + # Import if counters arg or initial import. if options["counters"] or initial_import_counters: if not initial_import_counters: # run with counters argument counters = options["counters"] - self.check_counters_argument(counters) + check_counters_argument(counters) else: counters = initial_import_counters - - for counter in counters: - logger.info(f"Importing/counting data for {counter}...") - import_state = ImportState.objects.filter( - csv_data_source=counter - ).first() - - if ( - import_state.current_year_number - and import_state.current_month_number - ): - start_time = "{year}-{month}-1T00:00".format( - year=import_state.current_year_number, - month=import_state.current_month_number, - ) - else: - start_month = ( - TELRAAM_COUNTER_START_MONTH - if counter == TELRAAM_COUNTER - else "01" - ) - start_time = f"{COUNTER_START_YEARS[counter]}-{start_month}-01" - - start_time = dateutil.parser.parse(start_time) - start_time = self.TIMEZONE.localize(start_time) - # The timeformat for the input data is : 2020-03-01T00:00 - # Convert starting time to input datas timeformat - start_time_string = start_time.strftime("%Y-%m-%dT%H:%M") - # start_index = None - match counter: - case COUNTERS.TELRAAM_COUNTER: - csv_data = get_telraam_counter_csv(start_time.date()) - case COUNTERS.LAM_COUNTER: - csv_data = get_lam_counter_csv(start_time.date()) - case COUNTERS.ECO_COUNTER: - csv_data = get_eco_counter_csv() - case COUNTERS.TRAFFIC_COUNTER: - if import_state.current_year_number: - start_year = import_state.current_year_number - else: - start_year = TRAFFIC_COUNTER_START_YEAR - csv_data = get_traffic_counter_csv(start_year=start_year) - start_index = csv_data.index[ - csv_data[INDEX_COLUMN_NAME] == start_time_string - ].values[0] - # As LAM data is fetched with a timespan, no index data is available, instead - # show time. - if counter == LAM_COUNTER: - logger.info(f"Starting saving observations at time:{start_time}") - else: - logger.info(f"Starting saving observations at index:{start_index}") - - csv_data = csv_data[start_index:] - self.save_observations( - csv_data, - start_time, - csv_data_source=counter, - ) - # Try to Free memory - del csv_data - gc.collect() + import_data(counters) diff --git a/eco_counter/management/commands/import_telraam_to_csv.py b/eco_counter/management/commands/import_telraam_to_csv.py index a6143a79a..729e04c41 100644 --- a/eco_counter/management/commands/import_telraam_to_csv.py +++ b/eco_counter/management/commands/import_telraam_to_csv.py @@ -28,7 +28,10 @@ TELRAAM_CSV, TELRAAM_HTTP, ) -from eco_counter.management.commands.utils import get_telraam_cameras +from eco_counter.management.commands.utils import ( + get_telraam_camera_location_and_geometry, + get_telraam_cameras, +) from eco_counter.models import ImportState TOKEN = settings.TELRAAM_TOKEN @@ -257,7 +260,7 @@ def save_dataframe(from_date: date = True) -> datetime: df = df.astype(int) csv_file = TELRAAM_COUNTER_CSV_FILE.format( - id=camera["mac"], + mac=camera["mac"], day=start_date.day, month=start_date.month, year=start_date.year, @@ -267,7 +270,15 @@ def save_dataframe(from_date: date = True) -> datetime: if os.path.exists(csv_file): os.remove(csv_file) if not os.path.exists(csv_file) or can_overwrite_csv_file: - df.to_csv(csv_file) + location, geometry = get_telraam_camera_location_and_geometry( + camera["segment_id"] + ) + # Write to WKT of the location, as the cameras position can change + with open(csv_file, "w") as file: + file.write(f"# {location.wkt} \n") + file.write(f"# {geometry.wkt} \n") + + df.to_csv(csv_file, mode="a") start_date += timedelta(days=1) start_date -= timedelta(days=1) diff --git a/eco_counter/management/commands/utils.py b/eco_counter/management/commands/utils.py index 695eefb81..90363b66b 100644 --- a/eco_counter/management/commands/utils.py +++ b/eco_counter/management/commands/utils.py @@ -8,9 +8,12 @@ from django.conf import settings from django.contrib.gis.gdal import DataSource from django.contrib.gis.geos import GEOSGeometry, LineString, MultiLineString, Point +from django.core.management.base import CommandError from eco_counter.constants import ( + COUNTER_CHOICES_STR, COUNTERS, + COUNTERS_LIST, ECO_COUNTER, INDEX_COLUMN_NAME, LAM_COUNTER, @@ -24,14 +27,14 @@ TELRAAM_COUNTER_CAMERAS, TELRAAM_COUNTER_CAMERAS_URL, TELRAAM_COUNTER_CSV_FILE, - TELRAAM_CSV, TELRAAM_HTTP, + TELRAAM_STATIONS_INITIAL_WKT_GEOMETRIES, TRAFFIC_COUNTER, TRAFFIC_COUNTER_CSV_URLS, TRAFFIC_COUNTER_METADATA_GEOJSON, ) -from eco_counter.models import ImportState, Station -from eco_counter.tests.test_import_counter_data import TEST_COLUMN_NAMES +from eco_counter.models import Station +from eco_counter.tests.constants import TEST_COLUMN_NAMES from mobility_data.importers.utils import get_root_dir logger = logging.getLogger("eco_counter") @@ -67,47 +70,22 @@ def __init__(self, feature): self.location = geom -class TelraamCounterStation: - # The Telraam API return the coordinates in EPSGS 31370 - SOURCE_SRID = 4326 - TARGET_SRID = settings.DEFAULT_SRID - - def get_location_and_geometry(self, id): - url = TELRAAM_COUNTER_CAMERA_SEGMENTS_URL.format(id=id) - headers = { - "X-Api-Key": settings.TELRAAM_TOKEN, - } - response = TELRAAM_HTTP.get(url, headers=headers) - assert ( - response.status_code == 200 - ), "Could not fetch segment for camera {id}".format(id=id) - json_data = response.json() - coords = json_data["features"][0]["geometry"]["coordinates"] - lss = [] - for coord in coords: - ls = LineString(coord, srid=self.SOURCE_SRID) - lss.append(ls) - geometry = MultiLineString(lss, srid=self.SOURCE_SRID) - geometry.transform(self.TARGET_SRID) - mid_line = round(len(coords) / 2) - mid_point = round(len(coords[mid_line]) / 2) - location = Point(coords[mid_line][mid_point], srid=self.SOURCE_SRID) - location.transform(self.TARGET_SRID) - return location, geometry +# class TelraamCounterStation: +# # The Telraam API return the coordinates in EPSGS 31370 +# SOURCE_SRID = 4326 +# TARGET_SRID = settings.DEFAULT_SRID - def __init__(self, feature): - self.name = feature["mac"] - self.name_sv = feature["mac"] - self.name_en = feature["mac"] - self.location, self.geometry = self.get_location_and_geometry( - feature["segment_id"] - ) - self.station_id = feature["mac"] +# def __init__(self, feature): +# self.name = feature["mac"] +# self.name_sv = feature["mac"] +# self.name_en = feature["mac"] +# self.location, self.geometry = get_telraam_camera_location_and_geometry( +# feature["segment_id"], self.SOURCE_SRID, self.TARGET_SRID +# ) +# self.station_id = feature["mac"] -class ObservationStation( - LAMStation, EcoCounterStation, TrafficCounterStation, TelraamCounterStation -): +class ObservationStation(LAMStation, EcoCounterStation, TrafficCounterStation): def __init__(self, csv_data_source, feature): self.csv_data_source = csv_data_source self.name = None @@ -117,8 +95,8 @@ def __init__(self, csv_data_source, feature): self.geometry = None self.station_id = None match csv_data_source: - case COUNTERS.TELRAAM_COUNTER: - TelraamCounterStation.__init__(self, feature) + # case COUNTERS.TELRAAM_COUNTER: + # TelraamCounterStation.__init__(self, feature) case COUNTERS.LAM_COUNTER: LAMStation.__init__(self, feature) case COUNTERS.ECO_COUNTER: @@ -127,6 +105,21 @@ def __init__(self, csv_data_source, feature): TrafficCounterStation.__init__(self, feature) +class TelraamStation: + def __init__(self, mac, location, geometry): + self.mac = mac + self.location = location + self.geometry = geometry + + +def check_counters_argument(counters): + for counter in counters: + if counter not in COUNTERS_LIST: + raise CommandError( + f"Invalid counter type, valid types are: {COUNTER_CHOICES_STR}." + ) + + def get_traffic_counter_metadata_data_layer(): meta_file = f"{get_root_dir()}/eco_counter/data/{TRAFFIC_COUNTER_METADATA_GEOJSON}" return DataSource(meta_file)[0] @@ -407,56 +400,169 @@ def get_telraam_counter_stations(): return stations -def get_telraam_counter_csv(from_date): - df = pd.DataFrame() - try: - import_state = ImportState.objects.get(csv_data_source=TELRAAM_CSV) - except ImportState.DoesNotExist: - return None - end_date = date( - import_state.current_year_number, - import_state.current_month_number, - import_state.current_day_number, +def get_telraam_camera_location_and_geometry(id, source_srid=4326, target_srid=3067): + url = TELRAAM_COUNTER_CAMERA_SEGMENTS_URL.format(id=id) + headers = { + "X-Api-Key": settings.TELRAAM_TOKEN, + } + response = TELRAAM_HTTP.get(url, headers=headers) + assert ( + response.status_code == 200 + ), "Could not fetch segment for camera {id}".format(id=id) + json_data = response.json() + if len(json_data["features"]) == 0: + logger.error(f"No data for Telraam camera with segment_id: {id}") + return None, None + + coords = json_data["features"][0]["geometry"]["coordinates"] + lss = [] + for coord in coords: + ls = LineString(coord, srid=source_srid) + lss.append(ls) + geometry = MultiLineString(lss, srid=source_srid) + geometry.transform(target_srid) + mid_line = round(len(coords) / 2) + mid_point = round(len(coords[mid_line]) / 2) + location = Point(coords[mid_line][mid_point], srid=source_srid) + location.transform(target_srid) + return location, geometry + + +def get_telraam_dataframe(mac, day, month, year): + csv_file = TELRAAM_COUNTER_CSV_FILE.format( + mac=mac, + day=day, + month=month, + year=year, + ) + comment_lines = [] + skiprows = 0 + # The location and geometry is stored as comments to the csv file + with open(csv_file, "r") as file: + for line in file: + if line.startswith("#"): + comment_lines.append(line) + skiprows += 1 + else: + break + return ( + pd.read_csv(csv_file, index_col=False, skiprows=skiprows), + csv_file, + comment_lines, ) + + +def parse_telraam_comment_lines(comment_lines): + location = None + geometry = None + comment_lines = [c.replace("# ", "") for c in comment_lines] + if len(comment_lines) > 0: + location = GEOSGeometry(comment_lines[0]) + if len(comment_lines) > 1: + geometry = GEOSGeometry(comment_lines[1]) + return location, geometry + + +def get_telraam_data_frames(from_date): + """ + For every camera create a dataframe for each location the camera has been placed. + """ + end_date = date.today() + data_frames = {} for camera in get_telraam_cameras(): df_cam = pd.DataFrame() start_date = from_date - + current_station = None + prev_comment_lines = [] while start_date <= end_date: - csv_file = TELRAAM_COUNTER_CSV_FILE.format( - id=camera["mac"], - day=start_date.day, - month=start_date.month, - year=start_date.year, - ) try: - df_tmp = pd.read_csv(csv_file, index_col=False) + df_tmp, csv_file, comment_lines = get_telraam_dataframe( + camera["mac"], start_date.day, start_date.month, start_date.year + ) except FileNotFoundError: logger.warning( f"File {csv_file} not found, skipping day{str(start_date)} for camera {camera}" ) else: + if not comment_lines and not current_station: + # Set the initial station, e.i, no coordinates defined in CSV source data + current_station = TelraamStation( + mac=camera["mac"], + location=GEOSGeometry( + TELRAAM_STATIONS_INITIAL_WKT_GEOMETRIES[camera["mac"]][ + "location" + ] + ), + geometry=GEOSGeometry( + TELRAAM_STATIONS_INITIAL_WKT_GEOMETRIES[camera["mac"]][ + "geometry" + ] + ), + ) + data_frames[current_station] = [] + elif comment_lines and not current_station: + location, geometry = parse_telraam_comment_lines(comment_lines) + current_station = TelraamStation( + mac=camera["mac"], location=location, geometry=geometry + ) + data_frames[current_station] = [] + + if prev_comment_lines != comment_lines: + location, geometry = parse_telraam_comment_lines(comment_lines) + # CSV files might contain the initial coordinates, to avoid creating duplicated check coordinates + if ( + location.wkt != current_station.location.wkt + and geometry.wkt != current_station.geometry.wkt + ): + df_cam[INDEX_COLUMN_NAME] = pd.to_datetime( + df_cam[INDEX_COLUMN_NAME], + format=TELRAAM_COUNTER_API_TIME_FORMAT, + ) + data_frames[current_station].append(df_cam) + current_station = TelraamStation( + mac=camera["mac"], location=location, geometry=geometry + ) + df_cam = pd.DataFrame() + data_frames[current_station] = [] df_cam = pd.concat([df_cam, df_tmp]) + finally: + prev_comment_lines = comment_lines start_date += timedelta(days=1) + if not df_cam.empty: + df_cam[INDEX_COLUMN_NAME] = pd.to_datetime( + df_cam[INDEX_COLUMN_NAME], format=TELRAAM_COUNTER_API_TIME_FORMAT + ) + data_frames[current_station].append(df_cam) - if df.empty: - df = df_cam - else: - df = pd.merge(df, df_cam, on=INDEX_COLUMN_NAME) + return data_frames - df[INDEX_COLUMN_NAME] = pd.to_datetime( - df[INDEX_COLUMN_NAME], format=TELRAAM_COUNTER_API_TIME_FORMAT - ) - return df + +def get_or_create_telraam_station(station): + name = str(station.mac) + filter = { + "csv_data_source": TELRAAM_COUNTER, + "name": name, + "name_sv": name, + "name_en": name, + "location": station.location, + "geometry": station.geometry, + "station_id": station.mac, + } + station_qs = Station.objects.filter(**filter) + if not station_qs.exists(): + obj = Station.objects.create(**filter) + else: + obj = station_qs.first() + return obj def save_stations(csv_data_source): stations = [] num_created = 0 match csv_data_source: - case COUNTERS.TELRAAM_COUNTER: - stations = get_telraam_counter_stations() + # case COUNTERS.TELRAAM_COUNTER: + # Telraam station are handled differently as they are dynamic case COUNTERS.LAM_COUNTER: stations = get_lam_counter_stations() case COUNTERS.ECO_COUNTER: @@ -503,14 +609,14 @@ def get_test_dataframe(counter): def gen_eco_counter_test_csv( - columns, start_time, end_time, time_stamp_column="startTime" + columns, start_time, end_time, time_stamp_column="startTime", freq="15min" ): """ Generates test data for a given timespan, - for every row (15min) the value 1 is set. + for every row ('freq') the value 1 is set. """ df = pd.DataFrame() - timestamps = pd.date_range(start=start_time, end=end_time, freq="15min") + timestamps = pd.date_range(start=start_time, end=end_time, freq=freq) for col in columns: vals = [1 for i in range(len(timestamps))] df.insert(0, col, vals) diff --git a/eco_counter/tasks.py b/eco_counter/tasks.py index 9ddefd6f4..af525b84a 100644 --- a/eco_counter/tasks.py +++ b/eco_counter/tasks.py @@ -14,8 +14,8 @@ def initial_import_counter_data(args, name="initial_import_counter_data"): @shared_task_email -def delete_all_counter_data(name="delete_all_counter_data"): - management.call_command("delete_all_counter_data") +def delete_counter_data(args, name="delete_counter_data"): + management.call_command("delete_counter_data", "--counters", args) @shared_task_email diff --git a/eco_counter/tests/conftest.py b/eco_counter/tests/conftest.py index e5af61b79..ed3fb16ef 100644 --- a/eco_counter/tests/conftest.py +++ b/eco_counter/tests/conftest.py @@ -1,4 +1,4 @@ -from datetime import timedelta +from datetime import date, timedelta import dateutil.parser import pytest @@ -133,58 +133,8 @@ def hour_data(stations, days): station=stations[0], day=days[0], ) - hour_data.values_ak = [ - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15, - 16, - 17, - 18, - 19, - 20, - 21, - 22, - 23, - 24, - ] - hour_data.values_ap = [ - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15, - 16, - 17, - 18, - 19, - 20, - 21, - 22, - 23, - 24, - ] + hour_data.values_ak = [v for v in range(1, 25)] + hour_data.values_ap = [v for v in range(1, 25)] hour_data.save() return hour_data @@ -197,6 +147,7 @@ def day_datas(stations, days): day_data = DayData.objects.create(station=stations[0], day=days[i]) day_data.value_ak = 5 + i day_data.value_ap = 6 + i + day_data.value_at = day_data.value_ak + day_data.value_ap day_data.save() day_datas.append(day_data) return day_datas @@ -240,3 +191,63 @@ def year_datas(stations, years): year_data.save() year_datas.append(year_data) return year_datas + + +@pytest.mark.django_db +@pytest.fixture +def is_active_fixtures(): + station0 = Station.objects.create( + id=0, + name="Station with 0 day of data", + location="POINT(0 0)", + csv_data_source=LAM_COUNTER, + ) + station1 = Station.objects.create( + id=1, + name="Station with 1 day of data", + location="POINT(0 0)", + csv_data_source=LAM_COUNTER, + ) + station7 = Station.objects.create( + id=7, + name="Station with 7 days of data", + location="POINT(0 0)", + csv_data_source=LAM_COUNTER, + ) + station30 = Station.objects.create( + id=30, + name="Station with 30 days of data", + location="POINT(0 0)", + csv_data_source=LAM_COUNTER, + ) + start_date = date.today() + current_date = start_date + days_counter = 0 + day_counts = [0, 1, 7, 30] + stations = [station0, station1, station7, station30] + while current_date >= start_date - timedelta(days=32): + for i, station in enumerate(stations): + days = day_counts[i] + day = Day.objects.create(station=station, date=current_date) + day_data = DayData.objects.create(station=station, day=day) + if i > 0: + start_day = day_counts[i - 1] + else: + start_day = 10000 + + if days > days_counter & days_counter >= start_day: + day_data.value_at = 1 + day_data.value_pt = 1 + day_data.value_jt = 1 + day_data.value_bt = 1 + day_data.save() + else: + day_data.value_at = 0 + day_data.value_pt = 0 + day_data.value_jt = 0 + day_data.value_bt = 0 + day_data.save() + + current_date -= timedelta(days=1) + days_counter += 1 + return Station.objects.all(), Day.objects.all(), DayData.objects.all() diff --git a/eco_counter/tests/constants.py b/eco_counter/tests/constants.py index 2935a3cc3..e3b9b3b14 100644 --- a/eco_counter/tests/constants.py +++ b/eco_counter/tests/constants.py @@ -1,3 +1,5 @@ +from eco_counter.constants import ECO_COUNTER, LAM_COUNTER, TRAFFIC_COUNTER + TEST_EC_STATION_NAME = "Auransilta" TEST_TC_STATION_NAME = "Myllysilta" TEST_LC_STATION_NAME = "Tie 8 Raisio" @@ -37,3 +39,9 @@ "Tie 8 Raisio BP", "Tie 8 Raisio BK", ] + +TEST_COLUMN_NAMES = { + ECO_COUNTER: ECO_COUNTER_TEST_COLUMN_NAMES, + TRAFFIC_COUNTER: TRAFFIC_COUNTER_TEST_COLUMN_NAMES, + LAM_COUNTER: LAM_COUNTER_TEST_COLUMN_NAMES, +} diff --git a/eco_counter/tests/test_api.py b/eco_counter/tests/test_api.py index 9e7fc2ce7..237d21886 100644 --- a/eco_counter/tests/test_api.py +++ b/eco_counter/tests/test_api.py @@ -3,12 +3,47 @@ import pytest from rest_framework.reverse import reverse -from .conftest import TEST_TIMESTAMP from .constants import TEST_EC_STATION_NAME @pytest.mark.django_db -def test__hour_data(api_client, hour_data): +def test_is_active(api_client, is_active_fixtures): + url = reverse("eco_counter:stations-detail", args=[0]) + response = api_client.get(url) + assert response.status_code == 200 + is_active = response.json()["is_active"] + assert is_active["1"] is False + assert is_active["7"] is False + assert is_active["30"] is False + assert is_active["365"] is False + url = reverse("eco_counter:stations-detail", args=[1]) + response = api_client.get(url) + assert response.status_code == 200 + is_active = response.json()["is_active"] + assert is_active["1"] is True + assert is_active["7"] is True + assert is_active["30"] is True + assert is_active["365"] is True + url = reverse("eco_counter:stations-detail", args=[7]) + response = api_client.get(url) + assert response.status_code == 200 + is_active = response.json()["is_active"] + assert is_active["1"] is False + assert is_active["7"] is True + assert is_active["30"] is True + assert is_active["365"] is True + url = reverse("eco_counter:stations-detail", args=[30]) + response = api_client.get(url) + assert response.status_code == 200 + is_active = response.json()["is_active"] + assert is_active["1"] is False + assert is_active["7"] is False + assert is_active["30"] is True + assert is_active["365"] is True + + +@pytest.mark.django_db +def test_hour_data(api_client, hour_data): url = reverse("eco_counter:hour_data-list") response = api_client.get(url) assert response.status_code == 200 @@ -18,7 +53,7 @@ def test__hour_data(api_client, hour_data): @pytest.mark.django_db -def test__day_data( +def test_day_data( api_client, day_datas, ): @@ -38,7 +73,7 @@ def test__day_data( @pytest.mark.django_db -def test__get_day_data(api_client, day_datas, station_id, test_timestamp): +def test_get_day_data(api_client, day_datas, station_id, test_timestamp): url = reverse( "eco_counter:day_data-get-day-data" ) + "?station_id={}&date={}".format(station_id, test_timestamp + timedelta(days=3)) @@ -50,7 +85,7 @@ def test__get_day_data(api_client, day_datas, station_id, test_timestamp): @pytest.mark.django_db -def test__get_day_datas(api_client, day_datas, station_id, test_timestamp): +def test_get_day_datas(api_client, day_datas, station_id, test_timestamp): url = reverse( "eco_counter:day_data-get-day-datas" ) + "?station_id={}&start_date={}&end_date={}".format( @@ -59,7 +94,6 @@ def test__get_day_datas(api_client, day_datas, station_id, test_timestamp): response = api_client.get(url) assert response.status_code == 200 res_json = response.json() - for i in range(4): assert res_json[i]["value_ak"] == day_datas[i].value_ak assert res_json[i]["value_ap"] == day_datas[i].value_ap @@ -67,7 +101,7 @@ def test__get_day_datas(api_client, day_datas, station_id, test_timestamp): @pytest.mark.django_db -def test__week_data(api_client, week_datas): +def test_week_data(api_client, week_datas): url = reverse("eco_counter:week_data-list") response = api_client.get(url) assert response.status_code == 200 @@ -84,7 +118,7 @@ def test__week_data(api_client, week_datas): @pytest.mark.django_db -def test__get_week_data(api_client, week_datas, station_id, test_timestamp): +def test_get_week_data(api_client, week_datas, station_id, test_timestamp): url = reverse( "eco_counter:week_data-get-week-data" ) + "?station_id={}&week_number={}&year_number={}".format( @@ -97,7 +131,7 @@ def test__get_week_data(api_client, week_datas, station_id, test_timestamp): @pytest.mark.django_db -def test__get_week_datas(api_client, week_datas, station_id, test_timestamp): +def test_get_week_datas(api_client, week_datas, station_id, test_timestamp): end_week_number = test_timestamp + timedelta(weeks=4) url = reverse( "eco_counter:week_data-get-week-datas" @@ -117,7 +151,7 @@ def test__get_week_datas(api_client, week_datas, station_id, test_timestamp): @pytest.mark.django_db -def test__month_data(api_client, month_datas): +def test_month_data(api_client, month_datas): url = reverse("eco_counter:month_data-list") response = api_client.get(url) assert response.status_code == 200 @@ -135,7 +169,7 @@ def test__month_data(api_client, month_datas): @pytest.mark.django_db -def test__get_month_data(api_client, month_datas, station_id, test_timestamp): +def test_get_month_data(api_client, month_datas, station_id, test_timestamp): url = reverse( "eco_counter:month_data-get-month-data" ) + "?station_id={}&month_number={}&year_number={}".format( @@ -149,7 +183,7 @@ def test__get_month_data(api_client, month_datas, station_id, test_timestamp): @pytest.mark.django_db -def test__get_year_datas(api_client, year_datas, station_id, test_timestamp): +def test_get_year_datas(api_client, year_datas, station_id, test_timestamp): end_year_number = test_timestamp.replace(year=test_timestamp.year + 1).year url = reverse( "eco_counter:year_data-get-year-datas" @@ -178,7 +212,7 @@ def test__get_year_datas(api_client, year_datas, station_id, test_timestamp): @pytest.mark.django_db -def test__get_month_datas(api_client, month_datas, station_id, test_timestamp): +def test_get_month_datas(api_client, month_datas, station_id, test_timestamp): url = reverse( "eco_counter:month_data-get-month-datas" ) + "?station_id={}&start_month_number={}&end_month_number={}&year_number={}".format( @@ -194,7 +228,7 @@ def test__get_month_datas(api_client, month_datas, station_id, test_timestamp): @pytest.mark.django_db -def test__year_data(api_client, year_datas): +def test_year_data(api_client, year_datas): url = reverse("eco_counter:year_data-list") response = api_client.get(url) assert response.status_code == 200 @@ -207,7 +241,7 @@ def test__year_data(api_client, year_datas): @pytest.mark.django_db -def test__days(api_client, days, test_timestamp): +def test_days(api_client, days, test_timestamp): url = reverse("eco_counter:days-list") response = api_client.get(url) assert response.status_code == 200 @@ -222,7 +256,7 @@ def test__days(api_client, days, test_timestamp): @pytest.mark.django_db -def test__weeks(api_client, weeks, test_timestamp): +def test_weeks(api_client, weeks, test_timestamp): url = reverse("eco_counter:weeks-list") response = api_client.get(url) assert response.status_code == 200 @@ -236,7 +270,7 @@ def test__weeks(api_client, weeks, test_timestamp): @pytest.mark.django_db -def test__months(api_client, months, test_timestamp): +def test_months(api_client, months, test_timestamp): url = reverse("eco_counter:months-list") response = api_client.get(url) assert response.status_code == 200 @@ -252,7 +286,7 @@ def test__months(api_client, months, test_timestamp): @pytest.mark.django_db -def test__months_multiple_years(api_client, years, test_timestamp): +def test_months_multiple_years(api_client, years, test_timestamp): url = reverse("eco_counter:years-list") response = api_client.get(url) assert response.status_code == 200 @@ -266,13 +300,14 @@ def test__months_multiple_years(api_client, years, test_timestamp): @pytest.mark.django_db -def test__station(api_client, stations, year_datas): +def test_station(api_client, stations, year_datas, day_datas): url = reverse("eco_counter:stations-list") response = api_client.get(url) assert response.status_code == 200 assert response.json()["results"][0]["name"] == TEST_EC_STATION_NAME assert response.json()["results"][0]["sensor_types"] == ["at"] - assert response.json()["results"][0]["data_from_year"] == TEST_TIMESTAMP.year + assert response.json()["results"][0]["data_until_date"] == "2020-01-07" + assert response.json()["results"][0]["data_from_date"] == "2020-01-01" # Test retrieving station by data type url = reverse("eco_counter:stations-list") + "?data_type=a" response = api_client.get(url) diff --git a/eco_counter/tests/test_import_counter_data.py b/eco_counter/tests/test_import_counter_data.py index 0a1856e80..b3c69d02f 100644 --- a/eco_counter/tests/test_import_counter_data.py +++ b/eco_counter/tests/test_import_counter_data.py @@ -7,13 +7,20 @@ imports and calculates the data correctly. """ import calendar +from datetime import timedelta from io import StringIO +from unittest.mock import patch import dateutil.parser import pytest from django.core.management import call_command -from eco_counter.constants import ECO_COUNTER, LAM_COUNTER, TRAFFIC_COUNTER +from eco_counter.constants import ( + ECO_COUNTER, + LAM_COUNTER, + TELRAAM_COUNTER, + TRAFFIC_COUNTER, +) from eco_counter.models import ( Day, DayData, @@ -27,21 +34,9 @@ Year, YearData, ) +from eco_counter.tests.utils import get_telraam_data_frames_test_fixture -from .constants import ( - ECO_COUNTER_TEST_COLUMN_NAMES, - LAM_COUNTER_TEST_COLUMN_NAMES, - TEST_EC_STATION_NAME, - TEST_LC_STATION_NAME, - TEST_TC_STATION_NAME, - TRAFFIC_COUNTER_TEST_COLUMN_NAMES, -) - -TEST_COLUMN_NAMES = { - ECO_COUNTER: ECO_COUNTER_TEST_COLUMN_NAMES, - TRAFFIC_COUNTER: TRAFFIC_COUNTER_TEST_COLUMN_NAMES, - LAM_COUNTER: LAM_COUNTER_TEST_COLUMN_NAMES, -} +from .constants import TEST_EC_STATION_NAME, TEST_LC_STATION_NAME, TEST_TC_STATION_NAME def import_command(*args, **kwargs): @@ -56,6 +51,114 @@ def import_command(*args, **kwargs): return out.getvalue() +@pytest.mark.django_db +@patch("eco_counter.management.commands.utils.get_telraam_data_frames") +def test_import_telraam(get_telraam_data_frames_mock): + from eco_counter.management.commands.import_counter_data import import_data + + start_time = dateutil.parser.parse("2023-09-01T00:00") + ImportState.objects.create( + current_year_number=start_time.year, + current_month_number=start_time.month, + current_day_number=start_time.day, + csv_data_source=TELRAAM_COUNTER, + ) + num_days_per_location = 2 + get_telraam_data_frames_mock.return_value = get_telraam_data_frames_test_fixture( + start_time, + num_cameras=2, + num_locations=3, + num_days_per_location=num_days_per_location, + ) + import_data([TELRAAM_COUNTER]) + stations_qs = Station.objects.all() + # num_stations * nul_locations, as for every location a station is created + assert stations_qs.count() == 6 + assert DayData.objects.count() == 12 + assert HourData.objects.count() == 12 + assert stations_qs.first().location.wkt == "POINT (2032 2032)" + assert Year.objects.count() == stations_qs.count() + import_state_qs = ImportState.objects.filter(csv_data_source=TELRAAM_COUNTER) + assert import_state_qs.count() == 1 + import_state = import_state_qs.first() + assert import_state.current_year_number == 2023 + assert import_state.current_month_number == 9 + assert import_state.current_day_number == 2 + # 12 + assert Day.objects.count() == stations_qs.count() * num_days_per_location + # Test that duplicates are not created + get_telraam_data_frames_mock.return_value = get_telraam_data_frames_test_fixture( + start_time, + num_cameras=2, + num_locations=3, + num_days_per_location=num_days_per_location, + ) + import_data([TELRAAM_COUNTER]) + assert stations_qs.count() == 6 + assert Year.objects.count() == stations_qs.count() + assert Day.objects.count() == stations_qs.count() * num_days_per_location + assert DayData.objects.count() == 12 + assert HourData.objects.count() == 12 + # Test new locations, adds 2 stations + new_start_time = start_time + timedelta(days=2) + get_telraam_data_frames_mock.return_value = get_telraam_data_frames_test_fixture( + new_start_time, + num_cameras=2, + num_locations=1, + num_days_per_location=num_days_per_location, + ) + import_data([TELRAAM_COUNTER]) + stations_qs = Station.objects.all() + assert stations_qs.count() == 8 + assert Year.objects.count() == stations_qs.count() + assert Day.objects.count() == 16 + # Test adding camera + get_telraam_data_frames_mock.return_value = get_telraam_data_frames_test_fixture( + new_start_time, + num_cameras=3, + num_locations=1, + num_days_per_location=num_days_per_location, + ) + import_data([TELRAAM_COUNTER]) + stations_qs = Station.objects.all() + assert stations_qs.count() == 9 + assert Year.objects.count() == stations_qs.count() + # Test data related to first station + station = Station.objects.filter(station_id="0").first() + year_data = YearData.objects.get(station=station) + assert year_data.value_ak == 24 * num_days_per_location + assert year_data.value_ap == 24 * num_days_per_location + assert year_data.value_at == 24 * num_days_per_location * 2 + assert year_data.value_pk == 24 * num_days_per_location + assert year_data.value_pp == 24 * num_days_per_location + assert year_data.value_pt == 24 * num_days_per_location * 2 + assert MonthData.objects.count() == stations_qs.count() * Year.objects.count() + assert Month.objects.count() == stations_qs.count() * Year.objects.count() + assert ( + MonthData.objects.get(station=station, month=Month.objects.first()).value_at + == 24 * num_days_per_location * 2 + ) + # 1.9.2023 is a friday, 9 stations has data for 1-3.9(week 34) and 3 stations has data for + # 4.5 (week 36) + assert WeekData.objects.count() == 12 + assert Week.objects.count() == 12 + # location*camera = 6 * num_days_per_location + cameras * num_days_per_location + DayData.objects.count() == 6 * 2 + 3 * 2 + Day.objects.count() == 6 * 2 + 3 * 2 + + # Three locations for two cameras + assert Day.objects.filter(date__day=1).count() == 6 + # One location for Three cameras + assert Day.objects.filter(date__day=4).count() == 3 + assert DayData.objects.first().value_at == 48 + assert DayData.objects.first().value_ap == 24 + assert DayData.objects.first().value_ak == 24 + HourData.objects.count() == 18 + for hour_data in HourData.objects.all(): + hour_data.values_ak == [1 for x in range(24)] + hour_data.values_at == [2 for x in range(24)] + + @pytest.mark.test_import_counter_data @pytest.mark.django_db def test_import_eco_counter_data(stations): diff --git a/eco_counter/tests/utils.py b/eco_counter/tests/utils.py new file mode 100644 index 000000000..55946dd38 --- /dev/null +++ b/eco_counter/tests/utils.py @@ -0,0 +1,49 @@ +from datetime import timedelta + +import pandas as pd +from django.contrib.gis.geos import GEOSGeometry + +from eco_counter.management.commands.utils import ( + gen_eco_counter_test_csv, + TelraamStation, +) + + +def get_telraam_data_frames_test_fixture( + from_date, + num_cameras=1, + num_locations=2, + num_days_per_location=2, +): + def get_location_and_geometry(i): + location = GEOSGeometry(f"POINT({i} {i})") + geometry = GEOSGeometry( + f"MULTILINESTRING (({i} {i}, 1 1), (1 1, 2 2), (2 2, 3 3))" + ) + return location, geometry + + if num_locations <= 0 or num_cameras <= 0 or num_days_per_location <= 0: + raise ValueError( + "'num_locations', 'num_cameras' and 'num_days_per_location' must be greated than 0." + ) + + column_types = ["AK", "AP", "PK", "PP"] + data_frames = {} + for c_c in range(num_cameras): + for l_c in range(num_locations): + index = c_c + l_c + from_date.year + from_date.month * from_date.day + location, geometry = get_location_and_geometry(index) + station = TelraamStation(c_c, location, geometry) + data_frames[station] = [] + columns = [f"{c_c} {t}" for t in column_types] + df = pd.DataFrame() + # Generate 'num_days_per_location' days of data for every location + start_date = from_date + for day in range(num_days_per_location): + csv_data = gen_eco_counter_test_csv( + columns, start_date, start_date + timedelta(hours=23), freq="1h" + ) + start_date += timedelta(days=1) + df = pd.concat([df, csv_data]) + data_frames[station].append(df) + return data_frames diff --git a/environment_data/README.md b/environment_data/README.md new file mode 100644 index 000000000..402627363 --- /dev/null +++ b/environment_data/README.md @@ -0,0 +1,46 @@ +# Environment data APP +The APP imports, processes and servers history data of the environment. +Hour datas are store as they are in the source data. Day, week, month and year +datas are stored as means, except from the Precipitation amount parameters where the +cumulative value is calculated. + +## AQ (Air Quality) +The imported parameters are: +* AQINDEX_PT1H_avg "Air quality index" (Ilmanlaatuindeksi) +* SO2_PT1H_avg "Sulphur dioxide - ug/m3" (Rikkidioksiidi) +* O3_PT1H_avg "Ozone - ug/m3" (Otsooni) +* NO2_PT1H_avg "Nitrogen dioxide - ug/m3" (Typpidioksiidi) +* PM10_PT1H_avg "Particulate matter < 10 µm - ug/m3" (Hengitettävät hiukkaset) +* PM25_PT1H_avg "Particulate matter < 2.5 µm - ug/m3" (Pienhiukkaset) + +## WO (Weather Observation) +The imported parameters are: +* TA_PT1H_AVG "Air temperature - degC" +* RH_PT1H_AVG "Relative humidity - %" +* WS_PT1H_AVG "Wind speed - m/s" +* WD_PT1H_AVG "Wind direction - deg" +* PRA_PT1H_ACC "Precipitation amount - mm", Note, Cumulative value +* PA_PT1H_AVG "Air pressure - hPA" + +# Importing +## Initial import +Note, initial import deletes all previously imported data for the given data type. +E.g., to initial import data and stations for weather observations: +``` +./manage.py import_environment_data --initial-import-with-stations WO +``` +E.g., to initial import air quality data without deleting stations: +``` +./manage.py import_environment_data --initial-import AQ +``` + +## Incremental import +E.e., to incrementally import air quality data type: +``` +./manage.py import_environment_data --data-types AQ +``` + +## To delete all data +``` +./manage.py delete_all_environment_data +``` \ No newline at end of file diff --git a/environment_data/__init__.py b/environment_data/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/environment_data/admin.py b/environment_data/admin.py new file mode 100644 index 000000000..5d1d0751d --- /dev/null +++ b/environment_data/admin.py @@ -0,0 +1,136 @@ +from django.contrib import admin + +from environment_data.models import ( + Day, + DayData, + Hour, + HourData, + ImportState, + Measurement, + Month, + MonthData, + Parameter, + Station, + Week, + WeekData, + Year, + YearData, +) + + +class BaseAdmin(admin.ModelAdmin): + def get_readonly_fields(self, request, obj=None): + return [f.name for f in self.model._meta.fields] + + +class DataAdmin(BaseAdmin): + def get_name(self, obj): + return obj.station.name + + # For optimizing purposes, if not set the admin will timeout when loading the row + raw_id_fields = ["measurements"] + + +class YearDataAdmin(DataAdmin): + list_display = ("get_name", "year") + + +class MonthDataAdmin(DataAdmin): + list_display = ("get_name", "month", "year") + + +class WeekDataAdmin(DataAdmin): + list_display = ("get_name", "week", "get_years") + + def get_years(self, obj): + return [y for y in obj.week.years.all()] + + +class DayDataAdmin(DataAdmin): + list_display = ( + "get_name", + "get_date", + ) + + def get_date(self, obj): + return obj.day.date + + +class HourDataAdmin(DataAdmin): + list_display = ( + "get_name", + "get_date", + ) + + def get_date(self, obj): + return obj.hour.day.date + + +class WeekAdmin(BaseAdmin): + list_display = ( + "week_number", + "get_year", + ) + + def get_year(self, obj): + return f"{', '.join([str(y.year_number) for y in obj.years.all()])} {obj.week_number}" + + +class YearAdmin(BaseAdmin): + list_display = ("get_year",) + + def get_year(self, obj): + return obj.year_number + + +class MonthAdmin(BaseAdmin): + list_display = ( + "month_number", + "get_year", + ) + + def get_year(self, obj): + return obj.year.year_number + + +class DayAdmin(BaseAdmin): + list_display = ("get_date",) + + def get_date(self, obj): + return obj.date + + +class HourAdmin(BaseAdmin): + list_display = ( + "hour_number", + "get_date", + ) + + def get_date(self, obj): + return obj.day.date + + +class ImportStateAdmin(admin.ModelAdmin): + def get_readonly_fields(self, request, obj=None): + return [f.name for f in self.model._meta.fields] + + +class StationAdmin(admin.ModelAdmin): + def get_readonly_fields(self, request, obj=None): + return [f.name for f in self.model._meta.fields] + + +admin.site.register(Station, StationAdmin) +admin.site.register(ImportState, ImportStateAdmin) +admin.site.register(YearData, YearDataAdmin) +admin.site.register(MonthData, MonthDataAdmin) +admin.site.register(WeekData, WeekDataAdmin) +admin.site.register(HourData, HourDataAdmin) +admin.site.register(DayData, DayDataAdmin) +admin.site.register(Year, YearAdmin) +admin.site.register(Month, MonthAdmin) +admin.site.register(Week, WeekAdmin) +admin.site.register(Day, DayAdmin) +admin.site.register(Hour, HourAdmin) +admin.site.register(Measurement) +admin.site.register(Parameter) diff --git a/environment_data/api/constants.py b/environment_data/api/constants.py new file mode 100644 index 000000000..158a236c9 --- /dev/null +++ b/environment_data/api/constants.py @@ -0,0 +1,88 @@ +import types + +from drf_spectacular.utils import OpenApiParameter + +from environment_data.constants import VALID_DATA_TYPE_CHOICES + +DATA_TYPES = types.SimpleNamespace() +HOUR = "hour" +DAY = "day" +WEEK = "week" +MONTH = "month" +YEAR = "year" +DATA_TYPES.HOUR = HOUR +DATA_TYPES.DAY = DAY +DATA_TYPES.WEEK = WEEK +DATA_TYPES.MONTH = MONTH +DATA_TYPES.YEAR = YEAR +DATETIME_FORMATS = { + HOUR: "%m-%d", + DAY: "%m-%d", + WEEK: "%W", + MONTH: "%m", + YEAR: "%Y", +} + +YEAR_PARAM = OpenApiParameter( + name="year", + location=OpenApiParameter.QUERY, + description=("Year of the data, not required when retrieving year data."), + required=False, + type=int, +) +TYPE_PARAM = OpenApiParameter( + name="type", + location=OpenApiParameter.QUERY, + description=( + f"Type of the data to be returned, types are: {', '.join([f for f in DATETIME_FORMATS])}" + ), + required=True, + type=str, +) +TIME_FORMATS_STR = ", ".join([f"{f[0]}: {f[1]}" for f in DATETIME_FORMATS.items()]) +START_PARAM = OpenApiParameter( + name="start", + location=OpenApiParameter.QUERY, + description=( + f"Start of the interval. Formats for different types are: {TIME_FORMATS_STR}" + ), + required=True, + type=str, +) +END_PARAM = OpenApiParameter( + name="end", + location=OpenApiParameter.QUERY, + description=( + f"End of the interval. Formats for different types are: {TIME_FORMATS_STR}" + ), + required=True, + type=str, +) +STATION_PARAM = OpenApiParameter( + name="station_id", + location=OpenApiParameter.QUERY, + description=("Id of the environemnt data station"), + required=True, + type=str, +) +DATA_TYPE_PARAM = OpenApiParameter( + name="data_type", + location=OpenApiParameter.QUERY, + description=( + f"'data_type' of the station, valid types are: {VALID_DATA_TYPE_CHOICES}" + ), + required=False, + type=str, +) + +ENVIRONMENT_DATA_PARAMS = [ + TYPE_PARAM, + YEAR_PARAM, + START_PARAM, + END_PARAM, + STATION_PARAM, +] + +ENVIRONMENT_STATION_PARAMS = [ + DATA_TYPE_PARAM, +] diff --git a/environment_data/api/serializers.py b/environment_data/api/serializers.py new file mode 100644 index 000000000..6efde594c --- /dev/null +++ b/environment_data/api/serializers.py @@ -0,0 +1,141 @@ +from rest_framework import serializers + +from environment_data.constants import DATA_TYPES_FULL_NAME +from environment_data.models import ( + Day, + DayData, + HourData, + Measurement, + MonthData, + Parameter, + Station, + WeekData, + YearData, +) + + +class StationSerializer(serializers.ModelSerializer): + parameters_in_use = serializers.SerializerMethodField() + data_type_verbose = serializers.SerializerMethodField() + + class Meta: + model = Station + fields = [ + "id", + "data_type", + "data_type_verbose", + "name", + "name_sv", + "name_en", + "location", + "geo_id", + "parameters_in_use", + ] + + def get_parameters_in_use(self, obj): + res = {} + for param in obj.parameters.all(): + qs = YearData.objects.filter( + station=obj, measurements__parameter=param, measurements__value__gte=0 + ) + if qs.count(): + res[param.name] = True + else: + res[param.name] = False + return res + + def get_data_type_verbose(self, obj): + return DATA_TYPES_FULL_NAME[obj.data_type] + + +class ParameterSerializer(serializers.ModelSerializer): + data_type_verbose = serializers.SerializerMethodField() + + class Meta: + model = Parameter + fields = ["id", "data_type", "data_type_verbose", "name", "description"] + + def get_data_type_verbose(self, obj): + return DATA_TYPES_FULL_NAME[obj.data_type] + + +class MeasurementSerializer(serializers.ModelSerializer): + parameter = serializers.PrimaryKeyRelatedField( + many=False, source="parameter.name", read_only=True + ) + + class Meta: + model = Measurement + fields = ["id", "value", "parameter"] + + +class DaySerializer(serializers.ModelSerializer): + class Meta: + model = Day + fields = "__all__" + + +class YearDataSerializer(serializers.ModelSerializer): + measurements = MeasurementSerializer(many=True) + year_number = serializers.PrimaryKeyRelatedField( + many=False, source="year.year_number", read_only=True + ) + + class Meta: + model = YearData + fields = ["id", "measurements", "year_number"] + + +class MonthDataSerializer(serializers.ModelSerializer): + measurements = MeasurementSerializer(many=True) + month_number = serializers.PrimaryKeyRelatedField( + many=False, source="month.month_number", read_only=True + ) + year_number = serializers.PrimaryKeyRelatedField( + many=False, source="month.year.year_number", read_only=True + ) + + class Meta: + model = MonthData + fields = ["id", "measurements", "month_number", "year_number"] + + +class WeekDataSerializer(serializers.ModelSerializer): + measurements = MeasurementSerializer(many=True) + week_number = serializers.PrimaryKeyRelatedField( + many=False, source="week.week_number", read_only=True + ) + + class Meta: + model = WeekData + fields = ["id", "measurements", "week_number"] + + +class DayDataSerializer(serializers.ModelSerializer): + measurements = MeasurementSerializer(many=True) + date = serializers.PrimaryKeyRelatedField( + many=False, source="day.date", read_only=True + ) + + class Meta: + model = DayData + fields = ["id", "measurements", "date"] + + +class HourDataSerializer(serializers.ModelSerializer): + measurements = MeasurementSerializer(many=True) + hour_number = serializers.PrimaryKeyRelatedField( + many=False, source="hour.hour_number", read_only=True + ) + date = serializers.PrimaryKeyRelatedField( + many=False, source="hour.day.date", read_only=True + ) + + class Meta: + model = HourData + fields = [ + "id", + "measurements", + "hour_number", + "date", + ] diff --git a/environment_data/api/urls.py b/environment_data/api/urls.py new file mode 100644 index 000000000..3e21a1c45 --- /dev/null +++ b/environment_data/api/urls.py @@ -0,0 +1,17 @@ +from django.urls import include, path +from rest_framework import routers + +from . import views + +app_name = "environment_data" + + +router = routers.DefaultRouter() + +router.register("data", views.DataViewSet, basename="data") +router.register("stations", views.StationViewSet, basename="stations") +router.register("parameters", views.ParameterViewSet, basename="parameters") + +urlpatterns = [ + path("api/v1/", include(router.urls), name="environment_data"), +] diff --git a/environment_data/api/utils.py b/environment_data/api/utils.py new file mode 100644 index 000000000..4c78d2206 --- /dev/null +++ b/environment_data/api/utils.py @@ -0,0 +1,55 @@ +from datetime import datetime + +from rest_framework.exceptions import ParseError + +from .constants import DATA_TYPES, DATETIME_FORMATS, DAY, HOUR, MONTH, WEEK, YEAR + + +def validate_timestamp(timestamp_str, data_type): + time_format = DATETIME_FORMATS[data_type] + try: + datetime.strptime(timestamp_str, time_format) + except ValueError: + return f"{timestamp_str} invalid format date format, valid format for type {data_type} is {time_format}" + return None + + +def get_start_and_end_and_year(filters, data_type): + start = filters.get("start", None) + end = filters.get("end", None) + year = filters.get("year", None) + + if not start or not end: + raise ParseError("Supply both 'start' and 'end' parameters") + + if YEAR not in data_type and not year: + raise ParseError("Supply 'year' parameter") + + res1 = None + res2 = None + match data_type: + case DATA_TYPES.DAY: + res1 = validate_timestamp(start, DAY) + res2 = validate_timestamp(end, DAY) + case DATA_TYPES.HOUR: + res1 = validate_timestamp(start, HOUR) + res2 = validate_timestamp(end, HOUR) + case DATA_TYPES.WEEK: + res1 = validate_timestamp(start, WEEK) + res2 = validate_timestamp(end, WEEK) + case DATA_TYPES.MONTH: + res1 = validate_timestamp(start, MONTH) + res2 = validate_timestamp(end, MONTH) + case DATA_TYPES.YEAR: + res1 = validate_timestamp(start, YEAR) + res2 = validate_timestamp(end, YEAR) + + if res1: + raise ParseError(res1) + if res2: + raise ParseError(res2) + + if HOUR in data_type or DAY in data_type: + start = f"{year}-{start}" + end = f"{year}-{end}" + return start, end, year diff --git a/environment_data/api/views.py b/environment_data/api/views.py new file mode 100644 index 000000000..c567bfb73 --- /dev/null +++ b/environment_data/api/views.py @@ -0,0 +1,156 @@ +from drf_spectacular.utils import extend_schema, extend_schema_view +from rest_framework import status, viewsets +from rest_framework.response import Response + +from environment_data.api.constants import ( + DATA_TYPES, + DATETIME_FORMATS, + ENVIRONMENT_DATA_PARAMS, + ENVIRONMENT_STATION_PARAMS, +) +from environment_data.api.serializers import ( + DayDataSerializer, + HourDataSerializer, + MonthDataSerializer, + ParameterSerializer, + StationSerializer, + WeekDataSerializer, + YearDataSerializer, +) +from environment_data.constants import DATA_TYPES_LIST, VALID_DATA_TYPE_CHOICES +from environment_data.models import ( + DayData, + HourData, + MonthData, + Parameter, + Station, + WeekData, + YearData, +) + +from .utils import get_start_and_end_and_year + + +@extend_schema_view( + list=extend_schema( + description="Environment data stations", + parameters=ENVIRONMENT_STATION_PARAMS, + ) +) +class StationViewSet(viewsets.ReadOnlyModelViewSet): + queryset = Station.objects.all() + serializer_class = StationSerializer + + def list(self, request, *args, **kwargs): + queryset = self.queryset + filters = self.request.query_params + data_type = filters.get("data_type", None) + if data_type: + data_type = str(data_type).upper() + if data_type not in DATA_TYPES_LIST: + return Response( + f"Invalid data type, valid types are: {VALID_DATA_TYPE_CHOICES}", + status=status.HTTP_400_BAD_REQUEST, + ) + + queryset = queryset.filter(data_type=data_type) + + page = self.paginate_queryset(queryset) + serializer = self.serializer_class(page, many=True) + return self.get_paginated_response(serializer.data) + + +@extend_schema_view( + list=extend_schema( + description="Environment data parameters", + ) +) +class ParameterViewSet(viewsets.ReadOnlyModelViewSet): + queryset = Parameter.objects.all() + serializer_class = ParameterSerializer + + +@extend_schema_view( + list=extend_schema( + parameters=ENVIRONMENT_DATA_PARAMS, + description="Returns yearly, monthly, weekly or daily means of measured parameters." + " Returns also the hourly measured parameters from which the means are calculated." + " Provide the 'type' parameter to choose what type of data to return.", + ) +) +class DataViewSet(viewsets.GenericViewSet): + queryset = YearData.objects.all() + + def list(self, request, *args, **kwargs): + filters = self.request.query_params + station_id = filters.get("station_id", None) + if not station_id: + return Response( + "Supply 'station_id' parameter.", status=status.HTTP_400_BAD_REQUEST + ) + else: + try: + station = Station.objects.get(id=station_id) + except Station.DoesNotExist: + return Response( + f"Station with id {station_id} not found.", + status=status.HTTP_400_BAD_REQUEST, + ) + + data_type = filters.get("type", None) + if not data_type: + return Response( + "Supply 'type' parameter", status=status.HTTP_400_BAD_REQUEST + ) + else: + data_type = data_type.lower() + + start, end, year = get_start_and_end_and_year(filters, data_type) + match data_type: + case DATA_TYPES.HOUR: + queryset = HourData.objects.filter( + station=station, + hour__day__year__year_number=year, + hour__day__date__gte=start, + hour__day__date__lte=end, + ) + serializer_class = HourDataSerializer + case DATA_TYPES.DAY: + queryset = DayData.objects.filter( + station=station, + day__date__gte=start, + day__date__lte=end, + day__year__year_number=year, + ) + serializer_class = DayDataSerializer + case DATA_TYPES.WEEK: + serializer_class = WeekDataSerializer + queryset = WeekData.objects.filter( + week__years__year_number=year, + station=station, + week__week_number__gte=start, + week__week_number__lte=end, + ) + case DATA_TYPES.MONTH: + serializer_class = MonthDataSerializer + queryset = MonthData.objects.filter( + month__year__year_number=year, + station=station, + month__month_number__gte=start, + month__month_number__lte=end, + ) + case DATA_TYPES.YEAR: + serializer_class = YearDataSerializer + queryset = YearData.objects.filter( + station=station, + year__year_number__gte=start, + year__year_number__lte=end, + ) + case _: + return Response( + f"Provide a valid 'type' parameters. Valid types are: {', '.join([f for f in DATETIME_FORMATS])}", + status=status.HTTP_400_BAD_REQUEST, + ) + page = self.paginate_queryset(queryset) + serializer = serializer_class(page, many=True) + return self.get_paginated_response(serializer.data) diff --git a/environment_data/apps.py b/environment_data/apps.py new file mode 100644 index 000000000..7feb4a4c3 --- /dev/null +++ b/environment_data/apps.py @@ -0,0 +1,6 @@ +from django.apps import AppConfig + + +class EnvironmentDataConfig(AppConfig): + default_auto_field = "django.db.models.BigAutoField" + name = "environment_data" diff --git a/environment_data/constants.py b/environment_data/constants.py new file mode 100644 index 000000000..0deebeb43 --- /dev/null +++ b/environment_data/constants.py @@ -0,0 +1,36 @@ +import types + +import requests +from requests.adapters import HTTPAdapter +from requests.packages.urllib3.util.retry import Retry + +AIR_QUALITY = "AQ" +WEATHER_OBSERVATION = "WO" +DATA_TYPES_FULL_NAME = { + AIR_QUALITY: "Air Quality", + WEATHER_OBSERVATION: "Weather Observation", +} +DATA_TYPE_CHOICES = ( + (AIR_QUALITY, DATA_TYPES_FULL_NAME[AIR_QUALITY]), + (WEATHER_OBSERVATION, DATA_TYPES_FULL_NAME[WEATHER_OBSERVATION]), +) + +DATA_TYPES = types.SimpleNamespace() +DATA_TYPES.AIR_QUALITY = AIR_QUALITY +DATA_TYPES.WEATHER_OBSERVATION = WEATHER_OBSERVATION + +VALID_DATA_TYPE_CHOICES = ", ".join( + [item[0] + f" ({item[1]})" for item in DATA_TYPES_FULL_NAME.items()] +) +DATA_TYPES_LIST = [AIR_QUALITY, WEATHER_OBSERVATION] + +retry_strategy = Retry( + total=12, + status_forcelist=[429], + allowed_methods=["GET", "POST"], + backoff_factor=60, # 60, 120 , 240, ..seconds +) +adapter = HTTPAdapter(max_retries=retry_strategy) +REQUEST_SESSION = requests.Session() +REQUEST_SESSION.mount("https://", adapter) +REQUEST_SESSION.mount("http://", adapter) diff --git a/environment_data/management/commands/air_quality_constants.py b/environment_data/management/commands/air_quality_constants.py new file mode 100644 index 000000000..78143ec68 --- /dev/null +++ b/environment_data/management/commands/air_quality_constants.py @@ -0,0 +1,37 @@ +START_YEAR = 2010 +# NOTE, No more than 10000 hours is allowed per request. +AIR_QUALITY_INDEX = "AQINDEX_PT1H_avg" # Ilmanlaatuindeksi +PARTICULATE_MATTER_10 = "PM10_PT1H_avg" # Hengitettävät hiukkaset +SULPHUR_DIOXIDE = "SO2_PT1H_avg" # rikkidioksiidi +OZONE = "O3_PT1H_avg" # otsooni +PARTICULATE_MATTER_25 = "PM25_PT1H_avg" # pienhiukkaset +NITROGEN_DIOXIDE = "NO2_PT1H_avg" # typpidioksiidi + +OBSERVABLE_PARAMETERS = [ + AIR_QUALITY_INDEX, + PARTICULATE_MATTER_10, + SULPHUR_DIOXIDE, + OZONE, + PARTICULATE_MATTER_25, + NITROGEN_DIOXIDE, +] +PARAMETER_DESCRIPTIONS = { + AIR_QUALITY_INDEX: "Air quality index", + SULPHUR_DIOXIDE: "Sulphur dioxide - ug/m3", + NITROGEN_DIOXIDE: "Nitrogen dioxide - ug/m3", + OZONE: "Ozone - ug/m3", + PARTICULATE_MATTER_10: "Particulate matter < 10 µm - ug/m3", + PARTICULATE_MATTER_25: "Particulate matter < 2.5 µm - ug/m3", +} + + +REQUEST_PARAMS = { + "request": "getFeature", + "storedquery_id": "urban::observations::airquality::hourly::timevaluepair", + "geoId": None, + "parameters": None, + "who": "fmi", + "startTime": None, + "endTime": None, +} +STATION_MATCH_STRINGS = ["Kolmannen osapuolen ilmanlaadun havaintoasema"] diff --git a/environment_data/management/commands/air_quality_utils.py b/environment_data/management/commands/air_quality_utils.py new file mode 100644 index 000000000..b7ea3fc96 --- /dev/null +++ b/environment_data/management/commands/air_quality_utils.py @@ -0,0 +1,82 @@ +import logging +import xml.etree.ElementTree as Et +from datetime import datetime, timedelta + +import pandas as pd +from dateutil.relativedelta import relativedelta + +from environment_data.constants import REQUEST_SESSION + +from .air_quality_constants import OBSERVABLE_PARAMETERS, REQUEST_PARAMS, START_YEAR +from .constants import DATA_URL, NAMESPACES, TIME_FORMAT + +logger = logging.getLogger(__name__) + + +def get_dataframe(stations, from_year=START_YEAR, from_month=1, initial_import=False): + current_date_time = datetime.now() + if from_year and from_month: + from_date_time = datetime.strptime(f"{from_year}-01-01T00:00:00Z", TIME_FORMAT) + column_data = {} + for station in stations: + logger.info(f"Fetching data for station {station['name']}") + for parameter in OBSERVABLE_PARAMETERS: + data = {} + start_date_time = from_date_time + while start_date_time.year <= current_date_time.year: + params = REQUEST_PARAMS + params["geoId"] = f"-{station['geoId']}" + params["parameters"] = parameter + + if not initial_import and from_year == current_date_time.year: + params["startTime"] = f"{from_year}-{from_month}-01T00:00Z" + else: + params["startTime"] = f"{start_date_time.year}-01-01T00:00Z" + if start_date_time.year == current_date_time.year: + + params["endTime"] = current_date_time.strftime(TIME_FORMAT) + else: + params["endTime"] = f"{start_date_time.year}-12-31T23:59Z" + + response = REQUEST_SESSION.get(DATA_URL, params=params) + logger.info(f"Requested data from: {response.url}") + if response.status_code == 200: + root = Et.fromstring(response.content) + observation_series = root.findall( + ".//omso:PointTimeSeriesObservation", + NAMESPACES, + ) + if len(observation_series) != 1: + logger.error( + f"Observation series length not 1, it is {len(observation_series)} " + ) + if start_date_time.year < current_date_time.year: + timestamp = start_date_time + end_timestamp = start_date_time + relativedelta(years=1) + while timestamp <= end_timestamp: + datetime_str = datetime.strftime(timestamp, TIME_FORMAT) + data[datetime_str] = float("nan") + timestamp += timedelta(hours=1) + start_date_time += relativedelta(years=1) + continue + + measurements = root.findall(".//wml2:MeasurementTVP", NAMESPACES) + logger.info(f"Fetched {len(measurements)} measurements.") + for measurement in measurements: + time = measurement.find("wml2:time", NAMESPACES).text + value = float(measurement.find("wml2:value", NAMESPACES).text) + data[time] = value + else: + logger.error( + f"Could not fetch data from {response.url}, {response.status_code} {response.content}" + ) + + start_date_time += relativedelta(years=1) + column_name = f"{station['name']} {params['parameters']}" + column_data[column_name] = data + + df = pd.DataFrame.from_dict(column_data) + df["Date"] = pd.to_datetime(df.index, format=TIME_FORMAT) + df = df.set_index("Date") + # df.to_csv("fmi.csv") + return df diff --git a/environment_data/management/commands/constants.py b/environment_data/management/commands/constants.py new file mode 100644 index 000000000..819cd02e2 --- /dev/null +++ b/environment_data/management/commands/constants.py @@ -0,0 +1,23 @@ +from .weather_observation_constants import PRECIPITATION_AMOUNT + +# If param is defined as cumulative, sum() function is used for DataFrame insted for mean() +CUMULATIVE_PARAMETERS = [PRECIPITATION_AMOUNT] +TIME_FORMAT = "%Y-%m-%dT%H:%M:%SZ" +SOURCE_DATA_SRID = 4326 + +NAMESPACES = { + "wfs": "http://www.opengis.net/wfs/2.0", + "om": "http://www.opengis.net/om/2.0", + "omso": "http://inspire.ec.europa.eu/schemas/omso/3.0", + "sams": "http://www.opengis.net/samplingSpatial/2.0", + "wml2": "http://www.opengis.net/waterml/2.0", + "ef": "http://inspire.ec.europa.eu/schemas/ef/4.0", + "xlink": "http://www.w3.org/1999/xlink", + "gml": "http://www.opengis.net/gml/3.2", +} + +STATION_URL = ( + "https://opendata.fmi.fi/wfs/fin?service=WFS&version=2.0.0&request=GetFeature&storedquery_id=fmi::ef::stations" + "&startTime=2023-10-9T00:00:00Z&endTime=2023-10-10T23:00:00Z" +) +DATA_URL = "https://data.fmi.fi/fmi-apikey/0fe6aa7c-de21-4f68-81d0-ed49c0409295/wfs" diff --git a/environment_data/management/commands/delete_all_environment_data.py b/environment_data/management/commands/delete_all_environment_data.py new file mode 100644 index 000000000..c5bab2d24 --- /dev/null +++ b/environment_data/management/commands/delete_all_environment_data.py @@ -0,0 +1,32 @@ +import logging + +from django import db +from django.core.management.base import BaseCommand + +from environment_data.models import ( + Day, + Hour, + ImportState, + Month, + Parameter, + Station, + Week, + Year, +) + +logger = logging.getLogger(__name__) + + +class Command(BaseCommand): + @db.transaction.atomic + def handle(self, *args, **options): + logger.info("Deleting all environment data...") + logger.info(f"{Station.objects.all().delete()}") + logger.info(f"{Parameter.objects.all().delete()}") + logger.info(f"{Year.objects.all().delete()}") + logger.info(f"{Month.objects.all().delete()}") + logger.info(f"{Week.objects.all().delete()}") + logger.info(f"{Day.objects.all().delete()}") + logger.info(f"{Hour.objects.all().delete()}") + logger.info(f"{ImportState.objects.all().delete()}") + logger.info("Deleted all environment data.") diff --git a/environment_data/management/commands/import_environment_data.py b/environment_data/management/commands/import_environment_data.py new file mode 100644 index 000000000..aa35b4f19 --- /dev/null +++ b/environment_data/management/commands/import_environment_data.py @@ -0,0 +1,537 @@ +import logging +from datetime import datetime +from functools import lru_cache + +from django.core.management import BaseCommand, CommandError +from django.db import connection, reset_queries + +import environment_data.management.commands.air_quality_constants as aq_constants +import environment_data.management.commands.air_quality_utils as am_utils +import environment_data.management.commands.weather_observation_constants as wo_constants +import environment_data.management.commands.weather_observation_utils as wo_utils +from environment_data.constants import ( + DATA_TYPE_CHOICES, + DATA_TYPES, + DATA_TYPES_FULL_NAME, + DATA_TYPES_LIST, + VALID_DATA_TYPE_CHOICES, +) +from environment_data.management.commands.constants import CUMULATIVE_PARAMETERS +from environment_data.models import ( + Day, + DayData, + Hour, + HourData, + ImportState, + Measurement, + Month, + MonthData, + Parameter, + Station, + Week, + WeekData, + Year, + YearData, +) + +from .utils import ( + create_row, + get_day_cached, + get_month_cached, + get_month_data_cached, + get_or_create_day_row_cached, + get_or_create_hour_row_cached, + get_or_create_row, + get_or_create_row_cached, + get_row_cached, + get_stations, + get_week_cached, + get_year_cached, + get_year_data_cached, +) + +logger = logging.getLogger(__name__) +OBSERVABLE_PARAMETERS = ( + aq_constants.OBSERVABLE_PARAMETERS + wo_constants.OBSERVABLE_PARAMETERS +) + + +def get_measurements(df, station_name): + mean_series = df.mean().dropna() + # For cumulative values, remove negative values. E.g., negative value is marked + # if no rain has been observed. If less than 0,1mm rain has been observed + # then 0 is assigned. + df[df < 0] = 0 + sum_series = df.sum().dropna() + values = {} + for parameter in OBSERVABLE_PARAMETERS: + key = f"{station_name} {parameter}" + value = None + if parameter in CUMULATIVE_PARAMETERS: + value = sum_series.get(key, None) + else: + value = mean_series.get(key, None) + if value is not None: + values[parameter] = value + return values + + +@lru_cache(maxsize=16) +def get_parameter(name): + try: + return Parameter.objects.get(name=name) + except Parameter.DoesNotExist: + return None + + +def get_measurement_objects(measurements): + measurement_rows = [] + for item in measurements.items(): + parameter = get_parameter(item[0]) + measurement = Measurement(value=item[1], parameter=parameter) + measurement_rows.append(measurement) + return measurement_rows + + +def bulk_create_rows(data_model, model_objs, measurements, datas): + logger.info(f"Bulk creating {len(model_objs)} {data_model.__name__} rows") + data_model.objects.bulk_create(model_objs) + logger.info(f"Bulk creating {len(measurements)} Measurement rows") + Measurement.objects.bulk_create(measurements) + for key in datas: + data = datas[key] + [data["data"].measurements.add(m) for m in data["measurements"]] + + +def save_years(df, stations): + logger.info("Saving years...") + years = df.groupby(df.index.year) + for station in stations: + measurements = [] + year_datas = {} + year_data_objs = [] + for index, data_frame in years: + year, _ = get_or_create_row_cached(Year, (("year_number", index),)) + values = get_measurements(data_frame, station.name) + year_data = YearData(station=station, year=year) + year_data_objs.append(year_data) + ret_mes = get_measurement_objects(values) + measurements += ret_mes + year_datas[index] = {"data": year_data, "measurements": ret_mes} + bulk_create_rows(YearData, year_data_objs, measurements, year_datas) + + +def save_months(df, stations): + logger.info("Saving months...") + months = df.groupby([df.index.year, df.index.month]) + for station in stations: + measurements = [] + month_datas = {} + month_data_objs = [] + for index, data_frame in months: + year_number, month_number = index + year = get_year_cached(year_number) + month, _ = get_or_create_row_cached( + Month, + (("year", year), ("month_number", month_number)), + ) + values = get_measurements(data_frame, station.name) + month_data = MonthData(station=station, year=year, month=month) + month_data_objs.append(month_data) + ret_mes = get_measurement_objects(values) + measurements += ret_mes + month_datas[index] = {"data": month_data, "measurements": ret_mes} + bulk_create_rows(MonthData, month_data_objs, measurements, month_datas) + + +def save_weeks(df, stations): + """ + Note, weeks are stored in a different way, as a week can not be assigned + distinctly to a year or month. So when importing incrementally the week + or weeks in the dataframe can not be deleted before populating, thus the + use of get_or_create. + """ + logger.info("Saving weeks...") + weeks = df.groupby([df.index.year, df.index.isocalendar().week]) + for i, station in enumerate(stations): + for index, data_frame in weeks: + year_number, week_number = index + if i == 0: + logger.info( + f"Processing week number {week_number} of year {year_number}" + ) + year = get_year_cached(year_number) + week, _ = Week.objects.get_or_create( + week_number=week_number, + years__year_number=year_number, + ) + if week.years.count() == 0: + week.years.add(year) + values = get_measurements(data_frame, station.name) + week_data, _ = WeekData.objects.get_or_create(station=station, week=week) + for item in values.items(): + parameter = get_parameter(item[0]) + if not week_data.measurements.filter( + value=item[1], parameter=parameter + ): + measurement = Measurement.objects.create( + value=item[1], parameter=parameter + ) + week_data.measurements.add(measurement) + + +def save_days(df, stations): + logger.info("Processing days...") + days = df.groupby( + [df.index.year, df.index.month, df.index.isocalendar().week, df.index.day] + ) + for station in stations: + measurements = [] + day_datas = {} + day_data_objs = [] + for index, data_frame in days: + year_number, month_number, week_number, day_number = index + date = datetime(year_number, month_number, day_number) + year = get_year_cached(year_number) + month = get_month_cached(year, month_number) + week = get_week_cached(year, week_number) + day, _ = get_or_create_day_row_cached(date, year, month, week) + values = get_measurements(data_frame, station.name) + day_data = DayData(station=station, day=day) + day_data_objs.append(day_data) + ret_mes = get_measurement_objects(values) + measurements += ret_mes + day_datas[index] = {"data": day_data, "measurements": ret_mes} + bulk_create_rows(DayData, day_data_objs, measurements, day_datas) + + +def save_hours(df, stations): + logger.info("Processing hours... ") + hours = df.groupby([df.index.year, df.index.month, df.index.day, df.index.hour]) + for station in stations: + measurements = [] + hour_datas = {} + hour_data_objs = [] + for index, data_frame in hours: + year_number, month_number, day_number, hour_number = index + date = datetime(year_number, month_number, day_number) + day = get_day_cached(date) + hour, _ = get_or_create_hour_row_cached(day, hour_number) + values = get_measurements(data_frame, station.name) + hour_data = HourData(station=station, hour=hour) + hour_data_objs.append(hour_data) + ret_mes = get_measurement_objects(values) + measurements += ret_mes + hour_datas[index] = {"data": hour_data, "measurements": ret_mes} + bulk_create_rows(HourData, hour_data_objs, measurements, hour_datas) + + +def save_current_year(stations, year_number, end_month_number): + logger.info(f"Saving current year {year_number}") + year = get_year_cached(year_number) + for station in stations: + measurements = {} + num_months = 0 + for month_number in range(1, end_month_number + 1): + month = get_month_cached(year, month_number) + month_data = get_month_data_cached(station, month) + if not month_data: + logger.debug(f"Month number {month_number} not found") + continue + else: + num_months += 1 + for measurement in month_data.measurements.all(): + key = measurement.parameter + measurements[key] = measurements.get(key, 0) + measurement.value + # get_or_create, if year changed the year needs to be created + year_data, _ = get_or_create_row_cached( + YearData, + ( + ("station", station), + ("year", year), + ), + ) + year_data.measurements.all().delete() + for parameter in station.parameters.all(): + try: + value = round(measurements[parameter] / num_months, 2) + except KeyError: + continue + measurement = Measurement.objects.create(value=value, parameter=parameter) + year_data.measurements.add(measurement) + + +def clear_cache(): + get_or_create_row_cached.cache_clear() + get_or_create_hour_row_cached.cache_clear() + get_or_create_day_row_cached.cache_clear() + get_row_cached.cache_clear() + get_year_cached.cache_clear() + get_year_data_cached.cache_clear() + get_month_cached.cache_clear() + get_month_data_cached.cache_clear() + get_week_cached.cache_clear() + get_day_cached.cache_clear() + get_parameter.cache_clear() + + +def delete_months(months_qs): + month_datas_qs = MonthData.objects.filter(month__in=months_qs) + [m.measurements.all().delete() for m in month_datas_qs] + days_qs = Day.objects.filter(month__in=months_qs) + day_datas_qs = DayData.objects.filter(day__in=days_qs) + [m.measurements.all().delete() for m in day_datas_qs] + hours_qs = Hour.objects.filter(day__in=days_qs) + hour_datas_qs = HourData.objects.filter(hour__in=hours_qs) + [m.measurements.all().delete() for m in hour_datas_qs] + months_qs.delete() + days_qs.delete() + hours_qs.delete() + + +def save_measurements(df, data_type, initial_import=False): + def delete_if_no_relations(items): + # If model does not have related rows, delete it. + # Cleans useless Year, Month, Week, Day rows. + for item in items: + model = item[0] + related_name = item[1] + for row in model.objects.all(): + if not getattr(row, related_name).exists(): + row.delete() + + stations = [station for station in Station.objects.filter(data_type=data_type)] + end_date = df.index[-1] + start_date = df.index[0] + if initial_import: + items = [ + (Year, "year_datas"), + (Month, "month_datas"), + (Week, "week_datas"), + (Day, "day_datas"), + (Hour, "hour_datas"), + ] + models = [YearData, MonthData, WeekData, DayData, HourData] + for station in stations: + for model in models: + logger.info( + f"Deleting {model.__name__} for {station.name}. {model.objects.filter(station=station).delete()}" + ) + delete_if_no_relations(items) + save_years(df, stations) + save_months(df, stations) + else: + create_row(Year, {"year_number": start_date.year}) + year = get_year_cached(year_number=start_date.year) + # Handle possible year change in dataframe + if df.index[-1].year > df.index[0].year: + months_qs = Month.objects.filter( + year=year, month_number__gte=start_date.month, month_number__lte=12 + ) + delete_months(months_qs) + create_row(Year, {"year_number": end_date.year}) + year = get_year_cached(year_number=end_date.year) + Month.objects.filter( + year=year, month_number__gte=1, month_number__lte=end_date.month + ) + save_months(df, stations) + save_current_year(stations, start_date.year, 12) + save_current_year(stations, end_date.year, end_date.month) + else: + months_qs = Month.objects.filter( + year=year, + month_number__gte=start_date.month, + month_number__lte=end_date.month, + ) + delete_months(months_qs) + save_months(df, stations) + save_current_year(stations, start_date.year, end_date.month) + + save_weeks(df, stations) + save_days(df, stations) + save_hours(df, stations) + import_state = ImportState.objects.get(data_type=data_type) + import_state.year_number = end_date.year + import_state.month_number = end_date.month + import_state.save() + if logger.level <= logging.DEBUG: + queries_time = sum([float(s["time"]) for s in connection.queries]) + logger.debug( + f"queries total execution time: {queries_time} Num queries: {len(connection.queries)}" + ) + reset_queries() + logger.debug( + f"get_or_create_row_cached {get_or_create_row_cached.cache_info()}" + ) + logger.debug( + f"get_or_create_hour_row_cached {get_or_create_hour_row_cached.cache_info()}" + ) + logger.debug( + f"get_or_create_day_row_cached {get_or_create_day_row_cached.cache_info()}" + ) + logger.debug(f"get_row_cached {get_row_cached.cache_info()}") + logger.debug(f"get_year_cached {get_year_cached.cache_info()}") + logger.debug(f"get_year_cached {get_year_data_cached.cache_info()}") + logger.debug(f"get_month_cached {get_month_cached.cache_info()}") + logger.debug(f"get_month_cached {get_month_data_cached.cache_info()}") + logger.debug(f"get_week_cached {get_week_cached.cache_info()}") + logger.debug(f"get_day_cached {get_day_cached.cache_info()}") + logger.debug(f"get_parameter {get_parameter.cache_info()}") + + +def save_parameter_types(df, data_type, initial_import=False): + match data_type: + case DATA_TYPES.AIR_QUALITY: + descriptions = aq_constants.PARAMETER_DESCRIPTIONS + case DATA_TYPES.WEATHER_OBSERVATION: + descriptions = wo_constants.PARAMETER_DESCRIPTIONS + case _: + descriptions = aq_constants.PARAMETER_DESCRIPTIONS + + if initial_import: + Parameter.objects.filter(data_type=data_type).delete() + for station in Station.objects.filter(data_type=data_type): + for parameter_name in OBSERVABLE_PARAMETERS: + key = f"{station.name} {parameter_name}" + if key in df.columns: + parameter, _ = get_or_create_row( + Parameter, + { + "name": parameter_name, + "description": descriptions[parameter_name], + "data_type": data_type, + }, + ) + station.parameters.add(parameter) + + +def save_stations(stations, data_type, initial_import_stations=False): + num_created = 0 + if initial_import_stations: + Station.objects.filter(data_type=data_type).delete() + object_ids = list( + Station.objects.filter(data_type=data_type).values_list("id", flat=True) + ) + for station in stations: + obj, created = get_or_create_row( + Station, + { + "name": station["name"], + "location": station["location"], + "geo_id": station["geoId"], + "data_type": data_type, + }, + ) + if obj.id in object_ids: + object_ids.remove(obj.id) + if created: + num_created += 1 + + Station.objects.filter(id__in=object_ids).delete() + logger.info(f"Deleted {len(object_ids)} obsolete environment data stations") + num_stations = Station.objects.filter(data_type=data_type).count() + logger.info( + f"Created {num_created} environment data stations of total {num_stations}." + ) + + +class Command(BaseCommand): + def add_arguments(self, parser): + parser.add_argument( + "--initial-import", + type=str, + nargs="+", + default=False, + help="Delete all data and reset import state befor importing", + ) + parser.add_argument( + "--initial-import-with-stations", + type=str, + nargs="+", + default=False, + help="Delete all data, all stations and reset import state befor importing", + ) + parser.add_argument( + "--data-types", + type=str, + nargs="+", + default=False, + help=f"Import environment data, choices are: {DATA_TYPE_CHOICES}.", + ) + + def check_data_types_argument(self, data_types): + for data_type in data_types: + if data_type not in DATA_TYPES_LIST: + raise CommandError( + f"Invalid data type, valid types are: {VALID_DATA_TYPE_CHOICES}." + ) + + def handle(self, *args, **options): + start_time = datetime.now() + initial_import = options.get("initial_import", False) + initial_import_stations = options.get("initial_import_with_stations", False) + + if initial_import_stations: + data_types = initial_import_stations + elif initial_import: + data_types = initial_import + else: + data_types = options.get("data_types", False) + if not data_types: + logger.info( + f"No data type provided, vlid types are: {VALID_DATA_TYPE_CHOICES}" + ) + return + self.check_data_types_argument(data_types) + + initial_import = bool(initial_import or initial_import_stations) + for data_type in data_types: + clear_cache() + if initial_import: + ImportState.objects.filter(data_type=data_type).delete() + start_year = None + match data_type: + case DATA_TYPES.AIR_QUALITY: + start_year = aq_constants.START_YEAR + case DATA_TYPES.WEATHER_OBSERVATION: + start_year = wo_constants.START_YEAR + case _: + start_year = 2010 + + import_state = ImportState.objects.create( + data_type=data_type, + year_number=start_year, + month_number=1, + ) + else: + import_state = ImportState.objects.get(data_type=data_type) + + match data_type: + case DATA_TYPES.AIR_QUALITY: + stations = get_stations(aq_constants.STATION_MATCH_STRINGS) + df = am_utils.get_dataframe( + stations, + import_state.year_number, + import_state.month_number, + initial_import, + ) + case DATA_TYPES.WEATHER_OBSERVATION: + stations = get_stations(wo_constants.STATION_MATCH_STRINGS) + df = wo_utils.get_dataframe( + stations, + import_state.year_number, + import_state.month_number, + initial_import, + ) + save_stations( + stations, data_type, initial_import_stations=initial_import_stations + ) + save_parameter_types(df, data_type, initial_import) + save_measurements(df, data_type, initial_import) + logger.info( + f"Imported {DATA_TYPES_FULL_NAME[data_type]} observations until:{str(df.index[-1])}" + ) + + end_time = datetime.now() + duration = end_time - start_time + logger.info(f"Imported environment data in: {duration}") diff --git a/environment_data/management/commands/utils.py b/environment_data/management/commands/utils.py new file mode 100644 index 000000000..0a51fbc96 --- /dev/null +++ b/environment_data/management/commands/utils.py @@ -0,0 +1,176 @@ +import logging +import xml.etree.ElementTree as Et +from functools import lru_cache + +from django.contrib.gis.geos import Point, Polygon + +from environment_data.constants import REQUEST_SESSION +from environment_data.models import Day, Hour, Month, MonthData, Week, Year, YearData +from mobility_data.importers.constants import ( + SOUTHWEST_FINLAND_BOUNDARY, + SOUTHWEST_FINLAND_BOUNDARY_SRID, +) + +from .constants import NAMESPACES, SOURCE_DATA_SRID, STATION_URL + +logger = logging.getLogger(__name__) + + +def get_stations(match_strings: list): + response = REQUEST_SESSION.get(STATION_URL) + stations = [] + + if response.status_code == 200: + polygon = Polygon( + SOUTHWEST_FINLAND_BOUNDARY, srid=SOUTHWEST_FINLAND_BOUNDARY_SRID + ) + root = Et.fromstring(response.content) + monitoring_facilities = root.findall( + ".//ef:EnvironmentalMonitoringFacility", NAMESPACES + ) + for mf in monitoring_facilities: + belongs_to = mf.find("ef:belongsTo", NAMESPACES) + title = belongs_to.attrib["{http://www.w3.org/1999/xlink}title"] + if title in match_strings: + station = {} + positions = mf.find(".//gml:pos", NAMESPACES).text.split(" ") + location = Point( + float(positions[1]), float(positions[0]), srid=SOURCE_DATA_SRID + ) + if polygon.covers(location): + station["name"] = mf.find("gml:name", NAMESPACES).text + station["location"] = location + station["geoId"] = mf.find("gml:identifier", NAMESPACES).text + stations.append(station) + else: + logger.error( + f"Could not get stations from {STATION_URL}, {response.status_code} {response.content}" + ) + + logger.info(f"Fetched {len(stations)} stations in Southwest Finland.") + return stations + + +@lru_cache(maxsize=4069) +def get_or_create_row_cached(model, filter: tuple): + filter = {key: value for key, value in filter} + results = model.objects.filter(**filter) + if results.exists(): + return results.first(), False + else: + return model.objects.create(**filter), True + + +@lru_cache(maxsize=4096) +def get_or_create_hour_row_cached(day, hour_number): + results = Hour.objects.filter(day=day, hour_number=hour_number) + if results.exists(): + return results.first(), False + else: + return ( + Hour.objects.create(day=day, hour_number=hour_number), + True, + ) + + +def create_row(model, filter): + results = model.objects.filter(**filter) + if not results.exists(): + model.objects.create(**filter) + + +def get_or_create_row(model, filter): + results = model.objects.filter(**filter) + if results.exists(): + return results.first(), False + else: + return model.objects.create(**filter), True + + +@lru_cache(maxsize=4096) +def get_or_create_day_row_cached(date, year, month, week): + results = Day.objects.filter( + date=date, + weekday_number=date.weekday(), + year=year, + month=month, + week=week, + ) + if results.exists(): + return results.first(), False + else: + return ( + Day.objects.create( + date=date, + weekday_number=date.weekday(), + year=year, + month=month, + week=week, + ), + True, + ) + + +@lru_cache(maxsize=4096) +# Use tuple as it is immutable and is hashable for lru_cache +def get_row_cached(model, filter: tuple): + filter = {key: value for key, value in filter} + results = model.objects.filter(**filter) + if results.exists(): + return results.first() + else: + return None + + +@lru_cache(maxsize=64) +def get_year_cached(year_number): + qs = Year.objects.filter(year_number=year_number) + if qs.exists(): + return qs.first() + else: + return None + + +@lru_cache(maxsize=128) +def get_year_data_cached(station, year): + qs = YearData.objects.filter(station=station, year=year) + if qs.exists(): + return qs.first() + else: + return None + + +@lru_cache(maxsize=256) +def get_month_cached(year, month_number): + qs = Month.objects.filter(year=year, month_number=month_number) + if qs.exists(): + return qs.first() + else: + return None + + +@lru_cache(maxsize=256) +def get_month_data_cached(station, month): + qs = MonthData.objects.filter(station=station, month=month) + if qs.exists(): + return qs.first() + else: + return None + + +@lru_cache(maxsize=1024) +def get_week_cached(years, week_number): + qs = Week.objects.filter(years=years, week_number=week_number) + if qs.exists(): + return qs.first() + else: + return None + + +@lru_cache(maxsize=2048) +def get_day_cached(date): + qs = Day.objects.filter(date=date) + if qs.exists(): + return qs.first() + else: + return None diff --git a/environment_data/management/commands/weather_observation_constants.py b/environment_data/management/commands/weather_observation_constants.py new file mode 100644 index 000000000..d5fe0088a --- /dev/null +++ b/environment_data/management/commands/weather_observation_constants.py @@ -0,0 +1,52 @@ +START_YEAR = 2010 +STATION_MATCH_STRINGS = ["Automaattinen sääasema"] +AIR_TEMPERATURE = "TA_PT1H_AVG" +RELATIVE_HUMIDITY = "RH_PT1H_AVG" +WIND_SPEED = "WS_PT1H_AVG" +WIND_DIRECTION = "WD_PT1H_AVG" +PRECIPITATION_AMOUNT = "PRA_PT1H_ACC" +AIR_PRESSURE = "PA_PT1H_AVG" + +OBSERVABLE_PARAMETERS = [ + AIR_TEMPERATURE, + RELATIVE_HUMIDITY, + WIND_SPEED, + WIND_DIRECTION, + PRECIPITATION_AMOUNT, + AIR_PRESSURE, +] + +PARAMETER_DESCRIPTIONS = { + AIR_TEMPERATURE: "Air temperature - degC", + RELATIVE_HUMIDITY: "Relative humidity - %", + WIND_SPEED: "Wind speed - m/s", + WIND_DIRECTION: "Wind direction - deg", + PRECIPITATION_AMOUNT: "Precipitation amount - mm", + AIR_PRESSURE: "Air pressure - hPA", +} + +REQUEST_PARAMS = { + "service": "WFS", + "version": "2.0.0", + "request": "getFeature", + "storedquery_id": "fmi::observations::weather::hourly::timevaluepair", + "fmisid": None, + "parameters": None, + "startTime": None, + "endTime": None, + "timeStep": 60, +} +# Note no more than 744 hours is allowed per request +DATA_URL = "https://opendata.fmi.fi/wfs" + +""" +Info about parameters: +https://www.ilmatieteenlaitos.fi/neuvoja-havaintosuureisiin +Obtaining the paramter info +Go to url: +https://opendata.fmi.fi/wfs?service=WFS&version=2.0.0&request=getFeature&storedquery_id= +fmi%3A%3Aobservations%3A%3Aweather%3A%3Ahourly%3A%3Atimevaluepair& +fmisid=100908&startTime=2022-1-01T00%3A00Z&endTime=2022-1-31T23%3A00Z&timeStep=60 +Find observedProperty elements and go to the link defined in xlink:href + +""" diff --git a/environment_data/management/commands/weather_observation_utils.py b/environment_data/management/commands/weather_observation_utils.py new file mode 100644 index 000000000..3bde67721 --- /dev/null +++ b/environment_data/management/commands/weather_observation_utils.py @@ -0,0 +1,107 @@ +import logging +import xml.etree.ElementTree as Et +from datetime import datetime, timedelta + +import pandas as pd +from dateutil.relativedelta import relativedelta + +from environment_data.constants import ( + DATA_TYPES_FULL_NAME, + REQUEST_SESSION, + WEATHER_OBSERVATION, +) + +from .constants import NAMESPACES, TIME_FORMAT +from .weather_observation_constants import ( + DATA_URL, + OBSERVABLE_PARAMETERS, + REQUEST_PARAMS, + START_YEAR, +) + +logger = logging.getLogger(__name__) + + +def get_dataframe(stations, from_year=START_YEAR, from_month=1, initial_import=False): + current_date_time = datetime.now() + if from_year and from_month: + from_date_time = datetime.strptime( + f"{from_year}-{from_month}-01T00:00:00Z", TIME_FORMAT + ) + column_data = {} + # Import data for every station and all parameters. Fetch data in montly chunks as + # no more than 744 hours is allowed per request. + for station in stations: + logger.info( + f"Fetching data for {DATA_TYPES_FULL_NAME[WEATHER_OBSERVATION]} station {station['name']}" + ) + for parameter in OBSERVABLE_PARAMETERS: + data = {} + start_date_time = from_date_time + while start_date_time <= current_date_time: + params = REQUEST_PARAMS + params["fmisid"] = station["geoId"] + params["parameters"] = parameter + if not initial_import and from_year == current_date_time.year: + params["startTime"] = f"{from_year}-{from_month}-01T00:00Z" + else: + params[ + "startTime" + ] = f"{start_date_time.year}-{start_date_time.month}-01T00:00Z" + if current_date_time - relativedelta(months=1) < start_date_time: + params["endTime"] = current_date_time.strftime(TIME_FORMAT) + else: + tmp_time = ( + start_date_time + + relativedelta(months=1) + - relativedelta(hours=1) + ) + params[ + "endTime" + ] = f"{tmp_time.year}-{tmp_time.month}-{tmp_time.day}T23:00Z" + + response = REQUEST_SESSION.get(DATA_URL, params=params) + logger.info(f"Requested data from: {response.url}") + if response.status_code == 200: + root = Et.fromstring(response.content) + observation_series = root.findall( + ".//omso:PointTimeSeriesObservation", + NAMESPACES, + ) + if len(observation_series) != 1: + logger.error( + f"Observation series length not 1, it is {len(observation_series)} " + ) + if start_date_time.month < current_date_time.month: + timestamp = start_date_time + end_timestamp = ( + start_date_time + + relativedelta(months=1) + - relativedelta(hours=1) + ) + while timestamp <= end_timestamp: + datetime_str = datetime.strftime(timestamp, TIME_FORMAT) + data[datetime_str] = float("nan") + timestamp += timedelta(hours=1) + start_date_time += relativedelta(months=1) + continue + + measurements = root.findall(".//wml2:MeasurementTVP", NAMESPACES) + logger.info(f"Fetched {len(measurements)} measurements.") + for measurement in measurements: + time = measurement.find("wml2:time", NAMESPACES).text + value = float(measurement.find("wml2:value", NAMESPACES).text) + data[time] = value + else: + logger.error( + f"Could not fetch data from {response.url}, {response.status_code} {response.content}" + ) + + start_date_time += relativedelta(months=1) + column_name = f"{station['name']} {params['parameters']}" + column_data[column_name] = data + + df = pd.DataFrame.from_dict(column_data) + df["Date"] = pd.to_datetime(df.index, format=TIME_FORMAT) + df = df.set_index("Date") + return df diff --git a/environment_data/migrations/0001_initial.py b/environment_data/migrations/0001_initial.py new file mode 100644 index 000000000..88ed91dd3 --- /dev/null +++ b/environment_data/migrations/0001_initial.py @@ -0,0 +1,595 @@ +# Generated by Django 4.1.10 on 2023-10-17 07:45 + +import django.contrib.gis.db.models.fields +import django.core.validators +import django.db.models.deletion +import django.utils.timezone +from django.db import migrations, models + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [] + + operations = [ + migrations.CreateModel( + name="Day", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("date", models.DateField(default=django.utils.timezone.now)), + ( + "weekday_number", + models.PositiveSmallIntegerField( + default=1, + validators=[ + django.core.validators.MinValueValidator(1), + django.core.validators.MaxValueValidator(7), + ], + ), + ), + ], + options={ + "ordering": ["-date"], + }, + ), + migrations.CreateModel( + name="DayData", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ], + options={ + "ordering": ["-day__date"], + }, + ), + migrations.CreateModel( + name="Hour", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "hour_number", + models.PositiveSmallIntegerField( + default=0, + validators=[ + django.core.validators.MinValueValidator(0), + django.core.validators.MaxValueValidator(23), + ], + ), + ), + ], + options={ + "ordering": ["-day__date", "-hour_number"], + }, + ), + migrations.CreateModel( + name="HourData", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ], + options={ + "ordering": ["-hour__day__date", "-hour__hour_number"], + }, + ), + migrations.CreateModel( + name="ImportState", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "data_type", + models.CharField( + choices=[("AQ", "Air Quality"), ("WO", "Weather Observation")], + default="AQ", + max_length=2, + ), + ), + ("year_number", models.PositiveSmallIntegerField(default=2010)), + ( + "month_number", + models.PositiveSmallIntegerField( + default=1, + validators=[ + django.core.validators.MinValueValidator(1), + django.core.validators.MaxValueValidator(12), + ], + ), + ), + ], + options={ + "abstract": False, + }, + ), + migrations.CreateModel( + name="Measurement", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("value", models.FloatField()), + ], + ), + migrations.CreateModel( + name="Month", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "month_number", + models.PositiveSmallIntegerField( + default=1, + validators=[ + django.core.validators.MinValueValidator(1), + django.core.validators.MaxValueValidator(12), + ], + ), + ), + ], + options={ + "ordering": ["-year__year_number", "-month_number"], + }, + ), + migrations.CreateModel( + name="MonthData", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ], + options={ + "ordering": ["-year__year_number", "-month__month_number"], + }, + ), + migrations.CreateModel( + name="Parameter", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "data_type", + models.CharField( + choices=[("AQ", "Air Quality"), ("WO", "Weather Observation")], + default="AQ", + max_length=2, + ), + ), + ("name", models.CharField(max_length=32)), + ("description", models.CharField(max_length=64, null=True)), + ], + options={ + "abstract": False, + }, + ), + migrations.CreateModel( + name="Station", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "data_type", + models.CharField( + choices=[("AQ", "Air Quality"), ("WO", "Weather Observation")], + default="AQ", + max_length=2, + ), + ), + ("name", models.CharField(max_length=64)), + ("name_fi", models.CharField(max_length=64, null=True)), + ("name_sv", models.CharField(max_length=64, null=True)), + ("name_en", models.CharField(max_length=64, null=True)), + ("location", django.contrib.gis.db.models.fields.PointField(srid=4326)), + ("geo_id", models.IntegerField()), + ], + options={ + "ordering": ["id"], + }, + ), + migrations.CreateModel( + name="Week", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "week_number", + models.PositiveSmallIntegerField( + validators=[ + django.core.validators.MinValueValidator(1), + django.core.validators.MaxValueValidator(53), + ] + ), + ), + ], + options={ + "ordering": ["-week_number"], + }, + ), + migrations.CreateModel( + name="WeekData", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ], + options={ + "ordering": ["-week__week_number"], + }, + ), + migrations.CreateModel( + name="Year", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("year_number", models.PositiveSmallIntegerField(default=2023)), + ], + options={ + "ordering": ["-year_number"], + }, + ), + migrations.CreateModel( + name="YearData", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "measurements", + models.ManyToManyField( + related_name="year_datas", to="environment_data.measurement" + ), + ), + ( + "station", + models.ForeignKey( + null=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="year_datas", + to="environment_data.station", + ), + ), + ( + "year", + models.ForeignKey( + null=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="year_datas", + to="environment_data.year", + ), + ), + ], + options={ + "ordering": ["-year__year_number"], + }, + ), + migrations.AddIndex( + model_name="year", + index=models.Index( + fields=["year_number"], name="environment_year_nu_1d4079_idx" + ), + ), + migrations.AddField( + model_name="weekdata", + name="measurements", + field=models.ManyToManyField( + related_name="week_datas", to="environment_data.measurement" + ), + ), + migrations.AddField( + model_name="weekdata", + name="station", + field=models.ForeignKey( + null=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="week_datas", + to="environment_data.station", + ), + ), + migrations.AddField( + model_name="weekdata", + name="week", + field=models.ForeignKey( + null=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="week_datas", + to="environment_data.week", + ), + ), + migrations.AddField( + model_name="week", + name="years", + field=models.ManyToManyField( + related_name="weeks", to="environment_data.year" + ), + ), + migrations.AddField( + model_name="station", + name="parameters", + field=models.ManyToManyField( + related_name="stations", to="environment_data.parameter" + ), + ), + migrations.AddField( + model_name="monthdata", + name="measurements", + field=models.ManyToManyField( + related_name="month_datas", to="environment_data.measurement" + ), + ), + migrations.AddField( + model_name="monthdata", + name="month", + field=models.ForeignKey( + null=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="month_datas", + to="environment_data.month", + ), + ), + migrations.AddField( + model_name="monthdata", + name="station", + field=models.ForeignKey( + null=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="month_datas", + to="environment_data.station", + ), + ), + migrations.AddField( + model_name="monthdata", + name="year", + field=models.ForeignKey( + null=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="month_datas", + to="environment_data.year", + ), + ), + migrations.AddField( + model_name="month", + name="year", + field=models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="months", + to="environment_data.year", + ), + ), + migrations.AddField( + model_name="measurement", + name="parameter", + field=models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="values", + to="environment_data.parameter", + ), + ), + migrations.AddField( + model_name="hourdata", + name="hour", + field=models.ForeignKey( + null=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="hour_datas", + to="environment_data.hour", + ), + ), + migrations.AddField( + model_name="hourdata", + name="measurements", + field=models.ManyToManyField( + related_name="hour_datas", to="environment_data.measurement" + ), + ), + migrations.AddField( + model_name="hourdata", + name="station", + field=models.ForeignKey( + null=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="hour_datas", + to="environment_data.station", + ), + ), + migrations.AddField( + model_name="hour", + name="day", + field=models.ForeignKey( + null=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="hours", + to="environment_data.day", + ), + ), + migrations.AddField( + model_name="daydata", + name="day", + field=models.ForeignKey( + null=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="day_datas", + to="environment_data.day", + ), + ), + migrations.AddField( + model_name="daydata", + name="measurements", + field=models.ManyToManyField( + related_name="day_datas", to="environment_data.measurement" + ), + ), + migrations.AddField( + model_name="daydata", + name="station", + field=models.ForeignKey( + null=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="day_datas", + to="environment_data.station", + ), + ), + migrations.AddField( + model_name="day", + name="month", + field=models.ForeignKey( + null=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="days", + to="environment_data.month", + ), + ), + migrations.AddField( + model_name="day", + name="week", + field=models.ForeignKey( + null=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="days", + to="environment_data.week", + ), + ), + migrations.AddField( + model_name="day", + name="year", + field=models.ForeignKey( + null=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="days", + to="environment_data.year", + ), + ), + migrations.AddIndex( + model_name="yeardata", + index=models.Index(fields=["year"], name="environment_year_id_5367a1_idx"), + ), + migrations.AddIndex( + model_name="weekdata", + index=models.Index( + fields=["station", "week"], name="environment_station_1097e4_idx" + ), + ), + migrations.AddIndex( + model_name="monthdata", + index=models.Index( + fields=["station", "month"], name="environment_station_62c407_idx" + ), + ), + migrations.AddIndex( + model_name="hourdata", + index=models.Index( + fields=["station", "hour"], name="environment_station_a052eb_idx" + ), + ), + migrations.AddIndex( + model_name="hour", + index=models.Index( + fields=["day", "hour_number"], name="environment_day_id_2eaccc_idx" + ), + ), + migrations.AddIndex( + model_name="daydata", + index=models.Index( + fields=["station", "day"], name="environment_station_2222b6_idx" + ), + ), + migrations.AddIndex( + model_name="day", + index=models.Index(fields=["date"], name="environment_date_a4fc96_idx"), + ), + ] diff --git a/environment_data/migrations/__init__.py b/environment_data/migrations/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/environment_data/models.py b/environment_data/models.py new file mode 100644 index 000000000..24ef01c7c --- /dev/null +++ b/environment_data/models.py @@ -0,0 +1,211 @@ +from django.contrib.gis.db import models +from django.core.validators import MaxValueValidator, MinValueValidator +from django.utils.timezone import now + +from .constants import AIR_QUALITY, DATA_TYPE_CHOICES + + +class DataTypeModel(models.Model): + class Meta: + abstract = True + + data_type = models.CharField( + max_length=2, + choices=DATA_TYPE_CHOICES, + default=AIR_QUALITY, + ) + + +class ImportState(DataTypeModel): + year_number = models.PositiveSmallIntegerField(default=2010) + month_number = models.PositiveSmallIntegerField( + validators=[MinValueValidator(1), MaxValueValidator(12)], + default=1, + ) + + +class Parameter(DataTypeModel): + name = models.CharField(max_length=32) + description = models.CharField(max_length=64, null=True) + + def __str__(self): + return self.name + + +class Measurement(models.Model): + value = models.FloatField() + parameter = models.ForeignKey( + "Parameter", on_delete=models.CASCADE, related_name="values" + ) + + def __str__(self): + return "%s %s" % (self.parameter.name, self.value) + + +class Station(DataTypeModel): + name = models.CharField(max_length=64) + location = models.PointField(srid=4326) + geo_id = models.IntegerField() + parameters = models.ManyToManyField("Parameter", related_name="stations") + + def __str__(self): + return "%s %s" % (self.name, self.location) + + class Meta: + ordering = ["id"] + + +class Year(models.Model): + year_number = models.PositiveSmallIntegerField(default=2023) + + class Meta: + ordering = ["-year_number"] + indexes = [models.Index(fields=["year_number"])] + + @property + def num_days(self): + return self.days.count() + + def __str__(self): + return "%s" % (self.year_number) + + +class Month(models.Model): + year = models.ForeignKey("Year", on_delete=models.CASCADE, related_name="months") + month_number = models.PositiveSmallIntegerField( + validators=[MinValueValidator(1), MaxValueValidator(12)], default=1 + ) + + @property + def num_days(self): + return self.days.count() + + def __str__(self): + return "%s" % (self.month_number) + + class Meta: + ordering = ["-year__year_number", "-month_number"] + + +class Week(models.Model): + week_number = models.PositiveSmallIntegerField( + validators=[MinValueValidator(1), MaxValueValidator(53)] + ) + years = models.ManyToManyField(Year, related_name="weeks") + + @property + def num_days(self): + return self.days.count() + + def __str__(self): + return "%s" % (self.week_number) + + class Meta: + ordering = ["-week_number"] + + +class Day(models.Model): + date = models.DateField(default=now) + weekday_number = models.PositiveSmallIntegerField( + validators=[MinValueValidator(1), MaxValueValidator(7)], default=1 + ) + week = models.ForeignKey( + "Week", on_delete=models.CASCADE, related_name="days", null=True + ) + month = models.ForeignKey( + "Month", on_delete=models.CASCADE, related_name="days", null=True + ) + year = models.ForeignKey( + "Year", on_delete=models.CASCADE, related_name="days", null=True + ) + + class Meta: + ordering = ["-date"] + indexes = [models.Index(fields=["date"])] + + +class Hour(models.Model): + day = models.ForeignKey( + "Day", on_delete=models.CASCADE, related_name="hours", null=True, db_index=True + ) + hour_number = models.PositiveSmallIntegerField( + validators=[MinValueValidator(0), MaxValueValidator(23)], default=0 + ) + + class Meta: + ordering = ["-day__date", "-hour_number"] + indexes = [models.Index(fields=["day", "hour_number"])] + + +class YearData(models.Model): + station = models.ForeignKey( + "Station", on_delete=models.CASCADE, related_name="year_datas", null=True + ) + year = models.ForeignKey( + "Year", on_delete=models.CASCADE, related_name="year_datas", null=True + ) + measurements = models.ManyToManyField("Measurement", related_name="year_datas") + + class Meta: + ordering = ["-year__year_number"] + indexes = [models.Index(fields=["year"])] + + +class MonthData(models.Model): + station = models.ForeignKey( + "Station", on_delete=models.CASCADE, related_name="month_datas", null=True + ) + month = models.ForeignKey( + "Month", on_delete=models.CASCADE, related_name="month_datas", null=True + ) + year = models.ForeignKey( + "Year", on_delete=models.CASCADE, related_name="month_datas", null=True + ) + measurements = models.ManyToManyField("Measurement", related_name="month_datas") + + class Meta: + ordering = ["-year__year_number", "-month__month_number"] + indexes = [models.Index(fields=["station", "month"])] + + +class WeekData(models.Model): + station = models.ForeignKey( + "Station", on_delete=models.CASCADE, related_name="week_datas", null=True + ) + week = models.ForeignKey( + "Week", on_delete=models.CASCADE, related_name="week_datas", null=True + ) + measurements = models.ManyToManyField("Measurement", related_name="week_datas") + + class Meta: + ordering = ["-week__week_number"] + indexes = [models.Index(fields=["station", "week"])] + + +class DayData(models.Model): + station = models.ForeignKey( + "Station", on_delete=models.CASCADE, related_name="day_datas", null=True + ) + day = models.ForeignKey( + "Day", on_delete=models.CASCADE, related_name="day_datas", null=True + ) + measurements = models.ManyToManyField("Measurement", related_name="day_datas") + + class Meta: + ordering = ["-day__date"] + indexes = [models.Index(fields=["station", "day"])] + + +# Hourly data for a day +class HourData(models.Model): + station = models.ForeignKey( + "Station", on_delete=models.CASCADE, related_name="hour_datas", null=True + ) + hour = models.ForeignKey( + "Hour", on_delete=models.CASCADE, related_name="hour_datas", null=True + ) + measurements = models.ManyToManyField("Measurement", related_name="hour_datas") + + class Meta: + ordering = ["-hour__day__date", "-hour__hour_number"] + indexes = [models.Index(fields=["station", "hour"])] diff --git a/environment_data/tasks.py b/environment_data/tasks.py new file mode 100644 index 000000000..aa4c30ece --- /dev/null +++ b/environment_data/tasks.py @@ -0,0 +1,25 @@ +from django.core import management + +from smbackend.utils import shared_task_email + + +@shared_task_email +def initial_import(args, name="initial_import"): + management.call_command("import_environment_data", "--initial-import", args) + + +@shared_task_email +def initial_import_with_stations(args, name="initial_import_with_stations"): + management.call_command( + "import_environment_data", "--initial-import-with-stations", args + ) + + +@shared_task_email +def incremental_import(args, name="incremental_import"): + management.call_command("import_environment_data", "--data-types", args) + + +@shared_task_email +def delete_all_data(name="delete_all_environment_data"): + management.call_command("delete_all_environment_data") diff --git a/environment_data/tests/conftest.py b/environment_data/tests/conftest.py new file mode 100644 index 000000000..b90fc7113 --- /dev/null +++ b/environment_data/tests/conftest.py @@ -0,0 +1,141 @@ +import pytest +from dateutil import parser +from rest_framework.test import APIClient + +from environment_data.constants import AIR_QUALITY, WEATHER_OBSERVATION +from environment_data.models import ( + Day, + DayData, + Hour, + HourData, + Measurement, + Month, + MonthData, + Parameter, + Station, + Week, + WeekData, + Year, + YearData, +) + + +@pytest.fixture +def api_client(): + return APIClient() + + +@pytest.mark.django_db +@pytest.fixture +def stations(parameters): + station = Station.objects.create( + id=1, geo_id=42, name="Test", data_type=AIR_QUALITY, location="POINT(60.1 22.2)" + ) + station.parameters.add(Parameter.objects.get(id=1)) + station.parameters.add(Parameter.objects.get(id=2)) + + station = Station.objects.create( + id=2, + geo_id=43, + name="Test2", + data_type=WEATHER_OBSERVATION, + location="POINT(60.1 22.2)", + ) + station.parameters.add(Parameter.objects.get(id=1)) + return Station.objects.all() + + +@pytest.mark.django_db +@pytest.fixture +def measurements(parameters): + Measurement.objects.create(id=1, parameter=Parameter.objects.get(id=1), value=1.5) + return Measurement.objects.all() + + +@pytest.mark.django_db +@pytest.fixture +def parameters(): + Parameter.objects.create(id=1, name="AQINDEX_PT1H_avg") + Parameter.objects.create(id=2, name="NO2_PT1H_avg") + return Parameter.objects.all() + + +@pytest.mark.django_db +@pytest.fixture +def years(): + Year.objects.create(id=1, year_number=2023) + return Year.objects.all() + + +@pytest.mark.django_db +@pytest.fixture +def months(years): + Month.objects.create(month_number=1, year=years[0]) + return Month.objects.all() + + +@pytest.mark.django_db +@pytest.fixture +def weeks(years): + week = Week.objects.create(week_number=1) + week.years.add(years[0]) + return Week.objects.all() + + +@pytest.mark.django_db +@pytest.fixture +def days(years, months, weeks): + Day.objects.create( + date=parser.parse("2023-01-01 00:00:00"), + year=years[0], + month=months[0], + week=weeks[0], + ) + return Day.objects.all() + + +@pytest.mark.django_db +@pytest.fixture +def hours(days): + Hour.objects.create(day=days[0], hour_number=0) + return Hour.objects.all() + + +@pytest.mark.django_db +@pytest.fixture +def year_datas(stations, years, measurements): + year_data = YearData.objects.create(station=stations[0], year=years[0]) + year_data.measurements.add(measurements[0]) + return YearData.objects.all() + + +@pytest.mark.django_db +@pytest.fixture +def month_datas(stations, months, measurements): + month_data = MonthData.objects.create(station=stations[0], month=months[0]) + month_data.measurements.add(measurements[0]) + return MonthData.objects.all() + + +@pytest.mark.django_db +@pytest.fixture +def week_datas(stations, weeks, measurements): + week_data = WeekData.objects.create(station=stations[0], week=weeks[0]) + week_data.measurements.add(measurements[0]) + return WeekData.objects.all() + + +@pytest.mark.django_db +@pytest.fixture +def day_datas(stations, days, measurements): + day_data = DayData.objects.create(station=stations[0], day=days[0]) + day_data.measurements.add(measurements[0]) + return DayData.objects.all() + + +@pytest.mark.django_db +@pytest.fixture +def hour_datas(stations, hours, measurements): + hour_data = HourData.objects.create(station=stations[0], hour=hours[0]) + hour_data.measurements.add(measurements[0]) + return HourData.objects.all() diff --git a/environment_data/tests/test_api.py b/environment_data/tests/test_api.py new file mode 100644 index 000000000..0db01fa8e --- /dev/null +++ b/environment_data/tests/test_api.py @@ -0,0 +1,99 @@ +import pytest +from rest_framework.reverse import reverse + +from environment_data.constants import AIR_QUALITY, DATA_TYPES_FULL_NAME + + +@pytest.mark.django_db +def test_station(api_client, stations, year_datas): + url = reverse("environment_data:stations-list") + response = api_client.get(url) + assert response.status_code == 200 + assert response.json()["count"] == 2 + url = reverse("environment_data:stations-list") + f"?data_type={AIR_QUALITY}" + response = api_client.get(url) + assert response.status_code == 200 + json_data = response.json() + assert json_data["count"] == 1 + result = json_data["results"][0] + assert result["data_type"] == AIR_QUALITY + assert result["data_type_verbose"] == DATA_TYPES_FULL_NAME[AIR_QUALITY] + assert result["name"] == "Test" + assert result["parameters_in_use"]["AQINDEX_PT1H_avg"] is True + assert result["parameters_in_use"]["NO2_PT1H_avg"] is False + + +@pytest.mark.django_db +def test_hour_data(api_client, hour_datas, parameters): + url = ( + reverse("environment_data:data-list") + + "?year=2023&start=01-01&end=02-01&station_id=1&type=hour" + ) + response = api_client.get(url) + assert response.status_code == 200 + json_data = response.json()["results"][0] + assert len(json_data["measurements"]) == 1 + assert json_data["measurements"][0]["value"] == 1.5 + assert json_data["measurements"][0]["parameter"] == parameters[0].name + assert json_data["hour_number"] == 0 + assert json_data["date"] == "2023-01-01" + + +@pytest.mark.django_db +def test_day_data(api_client, day_datas, parameters): + url = ( + reverse("environment_data:data-list") + + "?year=2023&start=01-01&end=02-01&station_id=1&type=day" + ) + response = api_client.get(url) + assert response.status_code == 200 + json_data = response.json()["results"][0] + assert len(json_data["measurements"]) == 1 + assert json_data["measurements"][0]["value"] == 1.5 + assert json_data["measurements"][0]["parameter"] == parameters[0].name + assert json_data["date"] == "2023-01-01" + + +@pytest.mark.django_db +def test_week_data(api_client, week_datas, parameters): + url = ( + reverse("environment_data:data-list") + + "?year=2023&start=1&end=1&station_id=1&type=week" + ) + response = api_client.get(url) + assert response.status_code == 200 + json_data = response.json()["results"][0] + assert len(json_data["measurements"]) == 1 + assert json_data["measurements"][0]["value"] == 1.5 + assert json_data["measurements"][0]["parameter"] == parameters[0].name + assert json_data["week_number"] == 1 + + +@pytest.mark.django_db +def test_month_data(api_client, month_datas, parameters): + url = ( + reverse("environment_data:data-list") + + "?year=2023&start=1&end=1&station_id=1&type=month" + ) + response = api_client.get(url) + assert response.status_code == 200 + json_data = response.json()["results"][0] + assert len(json_data["measurements"]) == 1 + assert json_data["measurements"][0]["value"] == 1.5 + assert json_data["measurements"][0]["parameter"] == parameters[0].name + assert json_data["month_number"] == 1 + + +@pytest.mark.django_db +def test_year_data(api_client, year_datas, parameters): + url = ( + reverse("environment_data:data-list") + + "?start=2023&end=2023&station_id=1&type=year" + ) + response = api_client.get(url) + assert response.status_code == 200 + json_data = response.json()["results"][0] + assert len(json_data["measurements"]) == 1 + assert json_data["measurements"][0]["value"] == 1.5 + assert json_data["measurements"][0]["parameter"] == parameters[0].name + assert json_data["year_number"] == 2023 diff --git a/environment_data/tests/test_importer.py b/environment_data/tests/test_importer.py new file mode 100644 index 000000000..dc1093d32 --- /dev/null +++ b/environment_data/tests/test_importer.py @@ -0,0 +1,386 @@ +import logging + +import dateutil.parser +import pandas as pd +import pytest +from django.contrib.gis.geos import Point + +import environment_data.management.commands.air_quality_constants as aq_constants +import environment_data.management.commands.weather_observation_constants as wo_constants +from environment_data.constants import AIR_QUALITY, WEATHER_OBSERVATION +from environment_data.management.commands.import_environment_data import clear_cache +from environment_data.models import ( + Day, + DayData, + Hour, + HourData, + ImportState, + Measurement, + Month, + MonthData, + Parameter, + Station, + Week, + WeekData, + Year, + YearData, +) + +logger = logging.getLogger(__name__) + + +KAARINA_STATION = "Kaarina Kaarina" +NAANTALI_STATION = "Naantali keskusta Asematori" +STATION_NAMES = [KAARINA_STATION, NAANTALI_STATION] + + +def get_stations(): + stations = [] + for i, name in enumerate(STATION_NAMES): + station = {"name": name} + station["geoId"] = i + station["location"] = Point(0, 0) + stations.append(station) + return stations + + +def get_test_dataframe( + columns, start_time, end_time, time_stamp_column="index", min_value=2, max_value=4 +): + """ + Generates test Dataframe for a given timespan, + """ + df = pd.DataFrame() + timestamps = pd.date_range(start=start_time, end=end_time, freq="1h") + for col in columns: + vals = [] + for i in range(len(timestamps)): + if i % 2 == 0: + vals.append(min_value) + else: + vals.append(max_value) + df.insert(0, col, vals) + + df.insert(0, time_stamp_column, timestamps) + df["Date"] = pd.to_datetime(df["index"]) + df = df.drop("index", axis=1) + df = df.set_index("Date") + return df + + +@pytest.mark.django_db +def test_importer(): + from environment_data.management.commands.import_environment_data import ( + save_measurements, + save_parameter_types, + save_stations, + ) + + data_type = AIR_QUALITY + options = {"initial_import": True} + ImportState.objects.create( + data_type=data_type, year_number=aq_constants.START_YEAR, month_number=1 + ) + clear_cache() + stations = get_stations() + save_stations(stations, data_type, options["initial_import"]) + num_stations = Station.objects.all().count() + assert num_stations == 2 + kaarina_station = Station.objects.get(name=KAARINA_STATION) + + # Always start at the beginning of the month as the incremental + # importer imports data monthly + start_time = dateutil.parser.parse("2021-11-01T00:00:00Z") + end_time = dateutil.parser.parse("2021-12-4T23:45:00Z") + columns = [] + for station_name in STATION_NAMES: + for parameter in aq_constants.OBSERVABLE_PARAMETERS: + columns.append(f"{station_name} {parameter}") + df = get_test_dataframe(columns, start_time, end_time) + save_parameter_types(df, data_type, options["initial_import"]) + num_parameters = Parameter.objects.all().count() + assert num_parameters == len(aq_constants.OBSERVABLE_PARAMETERS) + aqindex_parameter = Parameter.objects.get(name=aq_constants.AIR_QUALITY_INDEX) + assert ( + Parameter.objects.filter(name=aq_constants.PARTICULATE_MATTER_10).exists() + is True + ) + save_measurements(df, data_type, options["initial_import"]) + import_state = ImportState.objects.get(data_type=data_type) + assert import_state.year_number == 2021 + assert import_state.month_number == 12 + # Test year data + year = Year.objects.get(year_number=2021) + year_data = YearData.objects.get(station=kaarina_station, year=year) + measurement = year_data.measurements.get(parameter=aqindex_parameter) + assert round(measurement.value, 1) == 3.0 + assert measurement.parameter.name == aq_constants.AIR_QUALITY_INDEX + assert year_data.measurements.all().count() == num_parameters + assert Year.objects.all().count() == 1 + assert YearData.objects.all().count() == Station.objects.all().count() + # Test month data + november = Month.objects.get(year=year, month_number=11) + month_data = MonthData.objects.get(station=kaarina_station, month=november) + measurement = month_data.measurements.get(parameter=aqindex_parameter) + assert round(measurement.value, 1) == 3.0 + assert measurement.parameter.name == aq_constants.AIR_QUALITY_INDEX + assert Month.objects.all().count() == 2 + assert MonthData.objects.all().count() == 4 + assert month_data.measurements.all().count() == num_parameters + # Test week data + week_46 = Week.objects.get(week_number=46, years=year) + week_data = WeekData.objects.get(station=kaarina_station, week=week_46) + measurement = week_data.measurements.get(parameter=aqindex_parameter) + assert round(measurement.value, 1) == 3.0 + assert measurement.parameter.name == aq_constants.AIR_QUALITY_INDEX + assert Week.objects.all().count() == 5 + assert WeekData.objects.all().count() == 10 + assert week_data.measurements.all().count() == num_parameters + + # Test day + day = Day.objects.get(date=dateutil.parser.parse("2021-11-02T00:00:00Z")) + day_data = DayData.objects.get(station=kaarina_station, day=day) + measurement = day_data.measurements.get(parameter=aqindex_parameter) + assert round(measurement.value, 1) == 3.0 + assert measurement.parameter.name == aq_constants.AIR_QUALITY_INDEX + assert Day.objects.all().count() == 34 + assert DayData.objects.all().count() == 34 * num_stations + assert day_data.measurements.all().count() == num_parameters + + # Test hours + assert Hour.objects.all().count() == 34 * 24 + assert HourData.objects.all().count() == 34 * 24 * num_stations + hour = Hour.objects.get(day=day, hour_number=0) + hour_data = HourData.objects.get(station=kaarina_station, hour=hour) + measurement = hour_data.measurements.get(parameter=aqindex_parameter) + assert round(measurement.value, 1) == 2.0 + assert measurement.parameter.name == aq_constants.AIR_QUALITY_INDEX + assert hour_data.measurements.all().count() == num_parameters + + hour = Hour.objects.get(day=day, hour_number=1) + hour_data = HourData.objects.get(station=kaarina_station, hour=hour) + measurement = hour_data.measurements.get(parameter=aqindex_parameter) + assert round(measurement.value, 1) == 4.0 + assert measurement.parameter.name == aq_constants.AIR_QUALITY_INDEX + + # Test measurements + num_measurements = ( + YearData.objects.all().count() + + MonthData.objects.all().count() + + WeekData.objects.all().count() + + DayData.objects.all().count() + + HourData.objects.all().count() + ) * num_parameters + assert Measurement.objects.all().count() == num_measurements + # Test incremental import + clear_cache() + options = {"initial_import": False} + start_time = dateutil.parser.parse("2021-12-01T00:00:00Z") + end_time = dateutil.parser.parse("2021-12-15T23:45:00Z") + columns = [] + for station_name in STATION_NAMES: + for parameter in aq_constants.OBSERVABLE_PARAMETERS: + columns.append(f"{station_name} {parameter}") + df = get_test_dataframe(columns, start_time, end_time) + save_parameter_types(df, data_type, options["initial_import"]) + assert Parameter.objects.all().count() == len(aq_constants.OBSERVABLE_PARAMETERS) + aqindex_parameter = Parameter.objects.get(name=aq_constants.AIR_QUALITY_INDEX) + assert ( + Parameter.objects.filter(name=aq_constants.PARTICULATE_MATTER_10).exists() + is True + ) + save_measurements(df, data_type, options["initial_import"]) + year_data = YearData.objects.get(station=kaarina_station, year=year) + measurement = year_data.measurements.get(parameter=aqindex_parameter) + assert round(measurement.value, 1) == 3.0 + assert Year.objects.count() == 1 + assert Month.objects.count() == 2 + assert Week.objects.count() == 7 + assert Day.objects.count() == 30 + 15 + assert Hour.objects.count() == (30 + 15) * 24 + num_measurements = ( + YearData.objects.all().count() + + MonthData.objects.all().count() + + WeekData.objects.all().count() + + DayData.objects.all().count() + + HourData.objects.all().count() + ) * num_parameters + assert Measurement.objects.all().count() == num_measurements + # Test incremental import when year changes + clear_cache() + start_time = dateutil.parser.parse("2021-12-01T00:00:00Z") + end_time = dateutil.parser.parse("2022-01-15T23:45:00Z") + columns = [] + for station_name in STATION_NAMES: + for parameter in aq_constants.OBSERVABLE_PARAMETERS: + columns.append(f"{station_name} {parameter}") + df = get_test_dataframe(columns, start_time, end_time) + save_parameter_types(df, data_type, options["initial_import"]) + assert Parameter.objects.all().count() == len(aq_constants.OBSERVABLE_PARAMETERS) + aqindex_parameter = Parameter.objects.get(name=aq_constants.AIR_QUALITY_INDEX) + assert ( + Parameter.objects.filter(name=aq_constants.PARTICULATE_MATTER_10).exists() + is True + ) + save_measurements(df, data_type, options["initial_import"]) + year = Year.objects.get(year_number=2022) + year_data = YearData.objects.get(station=kaarina_station, year=year) + measurement = year_data.measurements.get(parameter=aqindex_parameter) + assert round(measurement.value, 1) == 3.0 + assert Year.objects.all().count() == 2 + assert YearData.objects.all().count() == Year.objects.all().count() * num_stations + assert Year.objects.get(year_number=2022) + assert Month.objects.all().count() == 3 + assert MonthData.objects.all().count() == Month.objects.all().count() * num_stations + assert Week.objects.all().count() == 12 + assert WeekData.objects.all().count() == Week.objects.all().count() * num_stations + assert Day.objects.all().count() == 30 + 31 + 15 + assert DayData.objects.all().count() == Day.objects.all().count() * num_stations + assert Hour.objects.all().count() == Day.objects.all().count() * 24 + assert ( + HourData.objects.all().count() == Day.objects.all().count() * 24 * num_stations + ) + # Test measurements after incremental imports + num_measurements = ( + YearData.objects.all().count() + + MonthData.objects.all().count() + + WeekData.objects.all().count() + + DayData.objects.all().count() + + HourData.objects.all().count() + ) * num_parameters + assert Measurement.objects.all().count() == num_measurements + import_state = ImportState.objects.get(data_type=data_type) + assert import_state.year_number == 2022 + assert import_state.month_number == 1 + + # Test initial import + clear_cache() + options = {"initial_import": True} + stations = get_stations() + save_stations(stations, data_type, options["initial_import"]) + assert Station.objects.all().count() == 2 + columns = [] + for station_name in STATION_NAMES: + for parameter in aq_constants.OBSERVABLE_PARAMETERS[0:2]: + columns.append(f"{station_name} {parameter}") + start_time = dateutil.parser.parse("2023-01-01T00:00:00Z") + end_time = dateutil.parser.parse("2023-01-10T23:45:00Z") + df = get_test_dataframe(columns, start_time, end_time) + save_parameter_types(df, data_type, options["initial_import"]) + assert Parameter.objects.all().count() == len( + aq_constants.OBSERVABLE_PARAMETERS[0:2] + ) + save_measurements(df, data_type, options["initial_import"]) + assert Year.objects.count() == 1 + assert Month.objects.count() == 1 + assert ( + Week.objects.count() == 3 + ) # 1.12.2023 is a Sunday, so three different weeks in 10 days + assert Day.objects.count() == 10 + assert Hour.objects.count() == 10 * 24 + num_measurements = ( + YearData.objects.all().count() + + MonthData.objects.all().count() + + WeekData.objects.all().count() + + DayData.objects.all().count() + + HourData.objects.all().count() + ) * Parameter.objects.all().count() + assert Measurement.objects.all().count() == num_measurements + import_state = ImportState.objects.get(data_type=data_type) + assert import_state.year_number == 2023 + assert import_state.month_number == 1 + # Test initial import also stations + clear_cache() + options = {"initial_import_also_stations": True} + stations = get_stations()[0:1] + save_stations(stations, data_type, options["initial_import_also_stations"]) + assert ( + Station.objects.all().count() + == Station.objects.filter(data_type=data_type).count() + ) + + +@pytest.mark.django_db +def test_cumulative_value(): + from environment_data.management.commands.import_environment_data import ( + save_measurements, + save_parameter_types, + save_stations, + ) + + data_type = WEATHER_OBSERVATION + options = {"initial_import": True} + ImportState.objects.create( + data_type=data_type, year_number=wo_constants.START_YEAR, month_number=1 + ) + clear_cache() + stations = get_stations() + save_stations(stations, data_type, options["initial_import"]) + num_stations = Station.objects.all().count() + assert num_stations == 2 + naantali_station = Station.objects.get(name=NAANTALI_STATION) + start_time = dateutil.parser.parse("2022-9-01T00:00:00Z") + end_time = dateutil.parser.parse("2022-10-4T23:45:00Z") + columns = [] + for station_name in STATION_NAMES: + for parameter in wo_constants.OBSERVABLE_PARAMETERS: + columns.append(f"{station_name} {parameter}") + df = get_test_dataframe(columns, start_time, end_time, min_value=1, max_value=1) + save_parameter_types(df, data_type, options["initial_import"]) + num_parameters = Parameter.objects.all().count() + assert num_parameters == len(wo_constants.OBSERVABLE_PARAMETERS) + precipitation_amount = Parameter.objects.get(name=wo_constants.PRECIPITATION_AMOUNT) + temperature = Parameter.objects.get(name=wo_constants.AIR_TEMPERATURE) + save_measurements(df, data_type, options["initial_import"]) + + import_state = ImportState.objects.get(data_type=data_type) + assert import_state.year_number == 2022 + assert import_state.month_number == 10 + year = Year.objects.get(year_number=2022) + year_data = YearData.objects.get(station=naantali_station, year=year) + measurement = year_data.measurements.get(parameter=precipitation_amount) + # imported days * hours imported + assert round(measurement.value, 0) == 34 * 24 + measurement = year_data.measurements.get(parameter=temperature) + assert round(measurement.value, 0) == 1 + + month = Month.objects.get(month_number=9) + month_data = MonthData.objects.get(station=naantali_station, month=month) + measurement = month_data.measurements.get(parameter=precipitation_amount) + # days in September * hours in day + assert round(measurement.value, 0) == 30 * 24 + measurement = month_data.measurements.get(parameter=temperature) + assert round(measurement.value, 0) == 1 + + week = Week.objects.get(week_number=36) + week_data = WeekData.objects.get(station=naantali_station, week=week) + measurement = week_data.measurements.get(parameter=precipitation_amount) + # days in week * hours in day + assert round(measurement.value, 0) == 7 * 24 + measurement = year_data.measurements.get(parameter=temperature) + assert round(measurement.value, 0) == 1 + + day = Day.objects.get(date=dateutil.parser.parse("2022-9-02T00:00:00Z")) + day_data = DayData.objects.get(station=naantali_station, day=day) + measurement = day_data.measurements.get(parameter=precipitation_amount) + assert round(measurement.value, 0) == 24 + measurement = day_data.measurements.get(parameter=temperature) + assert round(measurement.value, 0) == 1 + + hour = Hour.objects.get(day=day, hour_number=2) + hour_data = HourData.objects.get(station=naantali_station, hour=hour) + measurement = hour_data.measurements.get(parameter=precipitation_amount) + assert round(measurement.value, 0) == 1 + measurement = hour_data.measurements.get(parameter=temperature) + assert round(measurement.value, 0) == 1 + + # Test negative values + clear_cache() + df = get_test_dataframe(columns, start_time, end_time, min_value=-1, max_value=-1) + save_parameter_types(df, data_type, options["initial_import"]) + save_measurements(df, data_type, options["initial_import"]) + precipitation_amount = Parameter.objects.get(name=wo_constants.PRECIPITATION_AMOUNT) + year = Year.objects.get(year_number=2022) + year_data = YearData.objects.get(station=naantali_station, year=year) + measurement = year_data.measurements.get(parameter=precipitation_amount) + assert round(measurement.value, 0) == 0 diff --git a/environment_data/translation.py b/environment_data/translation.py new file mode 100644 index 000000000..60ef89b0e --- /dev/null +++ b/environment_data/translation.py @@ -0,0 +1,10 @@ +from modeltranslation.translator import TranslationOptions, translator + +from environment_data.models import Station + + +class StationTranslationOptions(TranslationOptions): + fields = ("name",) + + +translator.register(Station, StationTranslationOptions) diff --git a/mobility_data/importers/data/wfs_importer_config.yml b/mobility_data/importers/data/wfs_importer_config.yml index 723f60fb0..665e1db58 100644 --- a/mobility_data/importers/data/wfs_importer_config.yml +++ b/mobility_data/importers/data/wfs_importer_config.yml @@ -512,5 +512,5 @@ features: create_multipolygon: True extra_fields: speed_limit: - wfs_field: voimassa_a + wfs_field: rajoitus wfs_type: int diff --git a/mobility_data/importers/loading_unloading_places.py b/mobility_data/importers/loading_unloading_places.py index 0e6e9d7ca..b378564ff 100644 --- a/mobility_data/importers/loading_unloading_places.py +++ b/mobility_data/importers/loading_unloading_places.py @@ -26,6 +26,9 @@ class LoadingPlace(MobileUnitDataBase): "Saavutettavuus": { "type": FieldTypes.MULTILANG_STRING, }, + "rajoitukset": { + "type": FieldTypes.MULTILANG_STRING, + }, "lastauspiste": { "type": FieldTypes.MULTILANG_STRING, }, @@ -33,9 +36,25 @@ class LoadingPlace(MobileUnitDataBase): "type": FieldTypes.MULTILANG_STRING, }, "paikkoja_y": {"type": FieldTypes.INTEGER}, - "Lisätieto": { + "max_aika_h": {"type": FieldTypes.STRING}, + "max_aika_m": {"type": FieldTypes.STRING}, + "rajoitus_m": {"type": FieldTypes.STRING}, + "rajoitus_l": {"type": FieldTypes.STRING}, + "rajoitus_s": {"type": FieldTypes.STRING}, + "rajoitettu_ark": {"type": FieldTypes.STRING}, + "rajoitettu_l": {"type": FieldTypes.STRING}, + "rajoitettu_s": {"type": FieldTypes.STRING}, + "voimassaol": {"type": FieldTypes.STRING}, + "varattu_tie_ajoon": {"type": FieldTypes.MULTILANG_STRING}, + "erityisluv": {"type": FieldTypes.MULTILANG_STRING}, + "vuoropys": {"type": FieldTypes.STRING}, + "päiväys": {"type": FieldTypes.STRING}, + "lisätieto": { "type": FieldTypes.MULTILANG_STRING, }, + "maksuvyöh": {"type": FieldTypes.STRING}, + "rajoit_lis": {"type": FieldTypes.MULTILANG_STRING}, + "talvikunno": {"type": FieldTypes.STRING}, } def __init__(self, feature): @@ -102,6 +121,8 @@ def __init__(self, feature): self.extra[field_name][lang] = strings[i].strip() case FieldTypes.INTEGER: self.extra[field_name] = feature[field].as_int() + case FieldTypes.STRING: + self.extra[field_name] = feature[field].as_string() def get_geojson_file_name(): diff --git a/mobility_data/tests/data/loading_and_unloading_places.geojson b/mobility_data/tests/data/loading_and_unloading_places.geojson index a12105499..8ffe403f3 100644 --- a/mobility_data/tests/data/loading_and_unloading_places.geojson +++ b/mobility_data/tests/data/loading_and_unloading_places.geojson @@ -5,7 +5,7 @@ "features": [ { "type": "Feature", "properties": { "id": null, "Osoite": "Puolalankatu 8 20100 Turku / Puolalagatan 8 2010 Åbo", "Saavutettavuus": "Kadunvarsipysäköinti / Parkering på gata/ On-street parking", "rajoitukset": null, "lastauspiste": "Lastausalue / Lastningsplats / Loading zone", "paikkoja_y": "3", "rajoitustyyppi": "Erityisalue/ Specialområde/ Special area", "max_aika_h": null, "max_aika_m": null, "rajoitus_m": null, "rajoitus_l": null, "rajoitus_s": null, "rajoitettu_ark": null, "rajoitettu_l": null, "rajoitettu_s": null, "voimassaol": null, "varattu_tie_ajon": null, "erityisluv": null, "vuoropys": null, "päiväys": "1.7.2022", "lisätieto": "Pysäköintikielto/ Parkering förbjuden/ No parking", "maksuvyöh": "1", "rajoit_lis": "Lisäkilpi:Lastausalue/ Tilläggsskylt: Lastningsområde/ Additonal panel: Loading place", "talvikunno": null }, "geometry": { "type": "MultiPolygon", "coordinates": [ [ [ [ 23459333.231340952217579, 6704747.81081931386143 ], [ 23459337.514146234840155, 6704750.456081400625408 ], [ 23459341.985898811370134, 6704743.528014029376209 ], [ 23459337.640111096203327, 6704740.630822218954563 ], [ 23459333.231340952217579, 6704747.81081931386143 ] ] ] ] } }, { "type": "Feature", "properties": { "id": null, "Osoite": "Käsityöläiskatu 2 20100 Turku / Hantverkaregatan 2 20100 Åbo", "Saavutettavuus": "Kadunvarsipysäköinti / Parkering på gata/ On-street parking", "rajoitukset": null, "lastauspiste": "Lastausalue / Lastningsplats / Loading zone", "paikkoja_y": "2", "rajoitustyyppi": "Erityisalue/ Specialområde/ Special area", "max_aika_h": null, "max_aika_m": null, "rajoitus_m": null, "rajoitus_l": null, "rajoitus_s": null, "rajoitettu_ark": null, "rajoitettu_l": null, "rajoitettu_s": null, "voimassaol": null, "varattu_tie_ajon": null, "erityisluv": null, "vuoropys": null, "päiväys": "1.7.2022", "lisätieto": "Pysäköintikielto/ Parkering förbjuden/ No parking", "maksuvyöh": "1", "rajoit_lis": null, "talvikunno": null }, "geometry": { "type": "MultiPolygon", "coordinates": [ [ [ [ 23459212.746626514941454, 6704292.149668938480318 ], [ 23459214.571645777672529, 6704293.45325412787497 ], [ 23459222.58869469165802, 6704281.85134594514966 ], [ 23459220.307420611381531, 6704280.612940015271306 ], [ 23459212.746626514941454, 6704292.149668938480318 ] ] ] ] } }, - { "type": "Feature", "properties": { "id": null, "Osoite": "Läntinen Rantakatu 13 20700 Turku/ Östra Strandgatan 13 20700 Åbo", "Saavutettavuus": "Kadunvarsipysäköinti / Parkering på gata/ On-street parking", "rajoitukset": null, "lastauspiste": "Lastausalue / Lastningsplats / Loading zone", "paikkoja_y": "2", "rajoitustyyppi": "Erityisalue/ Specialområde/ Special area", "max_aika_h": null, "max_aika_m": null, "rajoitus_m": null, "rajoitus_l": null, "rajoitus_s": null, "rajoitettu_ark": null, "rajoitettu_l": null, "rajoitettu_s": null, "voimassaol": null, "varattu_tie_ajon": null, "erityisluv": null, "vuoropys": null, "päiväys": "21.7.2022", "lisätieto": "Pysäköintikieltoalue/ Parkeringsförbudszon/ No parking zone", "maksuvyöh": "2", "rajoit_lis": null, "talvikunno": null }, "geometry": { "type": "MultiPolygon", "coordinates": [ [ [ [ 23459733.336723871529102, 6704195.096125483512878 ], [ 23459733.484385836869478, 6704193.250350917689502 ], [ 23459724.034020062536001, 6704191.626069300808012 ], [ 23459708.824837647378445, 6704182.397196475416422 ], [ 23459707.126725047826767, 6704184.0953090749681 ], [ 23459722.852724343538284, 6704193.767167796380818 ], [ 23459733.336723871529102, 6704195.096125483512878 ] ] ] ] } } + { "type": "Feature", "properties": { "id": null, "Osoite": "Läntinen Rantakatu 13 20700 Turku/ Östra Strandgatan 13 20700 Åbo", "Saavutettavuus": "Kadunvarsipysäköinti / Parkering på gata/ On-street parking", "rajoitukset": null, "lastauspiste": "Lastausalue / Lastningsplats / Loading zone", "paikkoja_y": "2", "rajoitustyyppi": "Erityisalue/ Specialområde/ Special area", "max_aika_h":"3", "max_aika_m": null, "rajoitus_m": null, "rajoitus_l": "test", "rajoitus_s": null, "rajoitettu_ark": "4", "rajoitettu_l": null, "rajoitettu_s": null, "voimassaol": null, "varattu_tie_ajon": null, "erityisluv": null, "vuoropys": null, "päiväys": "21.7.2022", "lisätieto": "Pysäköintikieltoalue/ Parkeringsförbudszon/ No parking zone", "maksuvyöh": "2", "rajoit_lis": null, "talvikunno": null }, "geometry": { "type": "MultiPolygon", "coordinates": [ [ [ [ 23459733.336723871529102, 6704195.096125483512878 ], [ 23459733.484385836869478, 6704193.250350917689502 ], [ 23459724.034020062536001, 6704191.626069300808012 ], [ 23459708.824837647378445, 6704182.397196475416422 ], [ 23459707.126725047826767, 6704184.0953090749681 ], [ 23459722.852724343538284, 6704193.767167796380818 ], [ 23459733.336723871529102, 6704195.096125483512878 ] ] ] ] } } ] } \ No newline at end of file diff --git a/mobility_data/tests/data/speed_limits.gfs b/mobility_data/tests/data/speed_limits.gfs index 13651fd83..65df96ed8 100644 --- a/mobility_data/tests/data/speed_limits.gfs +++ b/mobility_data/tests/data/speed_limits.gfs @@ -13,8 +13,8 @@ 6707840.58200 - voimassa_a - voimassa_a + rajoitus + rajoitus Integer diff --git a/mobility_data/tests/data/speed_limits.gml b/mobility_data/tests/data/speed_limits.gml index fe6b708ce..16efb4a0f 100644 --- a/mobility_data/tests/data/speed_limits.gml +++ b/mobility_data/tests/data/speed_limits.gml @@ -11,7 +11,7 @@ - 20 + 20 20 @@ -26,7 +26,7 @@ - 40 + 40 40 @@ -41,7 +41,7 @@ - 80 + 80 80 diff --git a/mobility_data/tests/test_import_loading_and_unloading_places.py b/mobility_data/tests/test_import_loading_and_unloading_places.py index 7b17af605..1d4494ad8 100644 --- a/mobility_data/tests/test_import_loading_and_unloading_places.py +++ b/mobility_data/tests/test_import_loading_and_unloading_places.py @@ -57,3 +57,7 @@ def test_import(get_geojson_file_name_mock, municipalities): assert lantinen_rantakatu.extra["rajoitustyyppi"]["en"] == "Special area" assert lantinen_rantakatu.extra["paikkoja_y"] == 2 + assert lantinen_rantakatu.extra["max_aika_h"] == "3" + assert lantinen_rantakatu.extra["rajoitus_l"] == "test" + assert lantinen_rantakatu.extra["rajoitettu_ark"] == "4" + assert lantinen_rantakatu.extra["päiväys"] == "21.7.2022" diff --git a/requirements.txt b/requirements.txt index 06c388b79..71b681a26 100644 --- a/requirements.txt +++ b/requirements.txt @@ -248,7 +248,7 @@ uritemplate==4.1.1 # via drf-spectacular url-normalize==1.4.3 # via requests-cache -urllib3==1.26.16 +urllib3==1.26.18 # via # requests # requests-cache diff --git a/smbackend/settings.py b/smbackend/settings.py index 1615fb9bd..6e72fcc9f 100644 --- a/smbackend/settings.py +++ b/smbackend/settings.py @@ -78,6 +78,7 @@ MOBILITY_DATA_LOG_LEVEL=(str, "INFO"), BICYCLE_NETWORK_LOG_LEVEL=(str, "INFO"), STREET_MAINTENANCE_LOG_LEVEL=(str, "INFO"), + ENVIRONMENT_DATA_LOG_LEVEL=(str, "INFO"), ) @@ -101,6 +102,7 @@ MOBILITY_DATA_LOG_LEVEL = env("MOBILITY_DATA_LOG_LEVEL") BICYCLE_NETWORK_LOG_LEVEL = env("BICYCLE_NETWORK_LOG_LEVEL") STREET_MAINTENANCE_LOG_LEVEL = env("STREET_MAINTENANCE_LOG_LEVEL") +ENVIRONMENT_DATA_LOG_LEVEL = env("ENVIRONMENT_DATA_LOG_LEVEL") # Application definition INSTALLED_APPS = [ @@ -130,6 +132,7 @@ "bicycle_network.apps.BicycleNetworkConfig", "iot.apps.IotConfig", "street_maintenance.apps.StreetMaintenanceConfig", + "environment_data.apps.EnvironmentDataConfig", ] if env("ADDITIONAL_INSTALLED_APPS"): @@ -329,6 +332,10 @@ def gettext(s): "handlers": ["console"], "level": STREET_MAINTENANCE_LOG_LEVEL, }, + "environment_data": { + "handlers": ["console"], + "level": ENVIRONMENT_DATA_LOG_LEVEL, + }, }, } logging.config.dictConfig(LOGGING) @@ -338,6 +345,9 @@ def gettext(s): "/street_maintenance/geometry_history/", "/street_maintenance/maintenance_works/", "/street_maintenance/maintenance_units/", + "/environment_data/api/v1/stations/", + "/environment_data/api/v1/parameters/", + "/environment_data/api/v1/data/", ] diff --git a/smbackend/urls.py b/smbackend/urls.py index 08feada5a..b04e54fab 100644 --- a/smbackend/urls.py +++ b/smbackend/urls.py @@ -9,6 +9,7 @@ import bicycle_network.api.urls import eco_counter.api.urls +import environment_data.api.urls import mobility_data.api.urls import street_maintenance.api.urls from iot.api import IoTViewSet @@ -65,6 +66,11 @@ re_path( r"^bicycle_network/", include(bicycle_network.api.urls), name="bicycle_network" ), + re_path( + r"^environment_data/", + include(environment_data.api.urls), + name="environmet_data", + ), re_path( r"^street_maintenance/", include(street_maintenance.api.urls), diff --git a/smbackend_turku/importers/data/divisions_config.yml b/smbackend_turku/importers/data/divisions_config.yml index df6a866ce..13b17be50 100644 --- a/smbackend_turku/importers/data/divisions_config.yml +++ b/smbackend_turku/importers/data/divisions_config.yml @@ -12,7 +12,7 @@ divisions: fields: name: fi: Tunnus_FIN - sv: Tunnus_FIN + sv: Tunnus_SVE origin_id: Tunnus_FIN ocd_id: Tunnus_FIN