diff --git a/api/alembic/versions/f2e027a47a3f_bias_adjusted_precip.py b/api/alembic/versions/f2e027a47a3f_bias_adjusted_precip.py new file mode 100644 index 000000000..c81a0dd52 --- /dev/null +++ b/api/alembic/versions/f2e027a47a3f_bias_adjusted_precip.py @@ -0,0 +1,68 @@ +"""bias adjusted precip + +Revision ID: f2e027a47a3f +Revises: 5b745fe0bd7a +Create Date: 2023-10-30 11:34:21.603046 + +""" +from alembic import op +import sqlalchemy as sa + +# revision identifiers, used by Alembic. +revision = 'f2e027a47a3f' +down_revision = '5b745fe0bd7a' +branch_labels = None +depends_on = None + + +def upgrade(): + op.add_column('weather_station_model_predictions', sa.Column('bias_adjusted_precip_24h', sa.Float(), nullable=True)) + # Drop morecast_2_materialized view and recreate with the bias_adjusted_precip_24h field + op.execute("DROP MATERIALIZED VIEW morecast_2_materialized_view;") + op.execute(""" + CREATE MATERIALIZED VIEW morecast_2_materialized_view AS + SELECT weather_station_model_predictions.prediction_timestamp, prediction_models.abbreviation, weather_station_model_predictions.station_code, + weather_station_model_predictions.rh_tgl_2, weather_station_model_predictions.tmp_tgl_2, weather_station_model_predictions.bias_adjusted_temperature, + weather_station_model_predictions.bias_adjusted_rh, weather_station_model_predictions.precip_24h, weather_station_model_predictions.wdir_tgl_10, + weather_station_model_predictions.wind_tgl_10, weather_station_model_predictions.bias_adjusted_wind_speed, weather_station_model_predictions.bias_adjusted_wdir, + weather_station_model_predictions.bias_adjusted_precip_24h, weather_station_model_predictions.update_date, + weather_station_model_predictions.prediction_model_run_timestamp_id + FROM weather_station_model_predictions + JOIN prediction_model_run_timestamps + ON weather_station_model_predictions.prediction_model_run_timestamp_id = prediction_model_run_timestamps.id JOIN prediction_models + ON prediction_model_run_timestamps.prediction_model_id = prediction_models.id + JOIN ( + SELECT max(weather_station_model_predictions.prediction_timestamp) AS latest_prediction, weather_station_model_predictions.station_code AS station_code, + date(weather_station_model_predictions.prediction_timestamp) AS unique_day + FROM weather_station_model_predictions + WHERE date_part('hour', weather_station_model_predictions.prediction_timestamp) = 20 + GROUP BY weather_station_model_predictions.station_code, date(weather_station_model_predictions.prediction_timestamp) + ) AS latest + ON weather_station_model_predictions.prediction_timestamp = latest.latest_prediction AND weather_station_model_predictions.station_code = latest.station_code + ORDER BY weather_station_model_predictions.update_date DESC;""") + + +def downgrade(): + # Drop morecast_2_materialized view before dropping the column in the table in order to avoid dependency issues + op.execute("DROP MATERIALIZED VIEW morecast_2_materialized_view;") + op.drop_column('weather_station_model_predictions', 'bias_adjusted_precip_24h') + op.execute(""" + CREATE MATERIALIZED VIEW morecast_2_materialized_view AS + SELECT weather_station_model_predictions.prediction_timestamp, prediction_models.abbreviation, weather_station_model_predictions.station_code, + weather_station_model_predictions.rh_tgl_2, weather_station_model_predictions.tmp_tgl_2, weather_station_model_predictions.bias_adjusted_temperature, + weather_station_model_predictions.bias_adjusted_rh, weather_station_model_predictions.precip_24h, weather_station_model_predictions.wdir_tgl_10, + weather_station_model_predictions.wind_tgl_10, weather_station_model_predictions.bias_adjusted_wind_speed, weather_station_model_predictions.bias_adjusted_wdir, + weather_station_model_predictions.update_date + FROM weather_station_model_predictions + JOIN prediction_model_run_timestamps + ON weather_station_model_predictions.prediction_model_run_timestamp_id = prediction_model_run_timestamps.id JOIN prediction_models + ON prediction_model_run_timestamps.prediction_model_id = prediction_models.id + JOIN ( + SELECT max(weather_station_model_predictions.prediction_timestamp) AS latest_prediction, weather_station_model_predictions.station_code AS station_code, + date(weather_station_model_predictions.prediction_timestamp) AS unique_day + FROM weather_station_model_predictions + WHERE date_part('hour', weather_station_model_predictions.prediction_timestamp) = 20 + GROUP BY weather_station_model_predictions.station_code, date(weather_station_model_predictions.prediction_timestamp) + ) AS latest + ON weather_station_model_predictions.prediction_timestamp = latest.latest_prediction AND weather_station_model_predictions.station_code = latest.station_code + ORDER BY weather_station_model_predictions.update_date DESC;""") \ No newline at end of file diff --git a/api/app/db/crud/observations.py b/api/app/db/crud/observations.py index 5f91f95d2..f3ee6eb0e 100644 --- a/api/app/db/crud/observations.py +++ b/api/app/db/crud/observations.py @@ -2,10 +2,11 @@ """ import datetime from typing import List -from sqlalchemy import and_, select +from sqlalchemy import and_, select, text from sqlalchemy.sql import func from sqlalchemy.orm import Session -from app.db.models.weather_models import ModelRunPrediction, PredictionModelRunTimestamp +from app.db.models.weather_models import (ModelRunPrediction, PredictionModel, PredictionModelRunTimestamp, + WeatherStationModelPrediction) from app.db.models.observations import HourlyActual @@ -69,3 +70,74 @@ def get_accumulated_precipitation(session: Session, station_code: int, start_dat if result is None: return 0 return result + + +def get_accumulated_precip_by_24h_interval(session: Session, station_code: int, start_datetime: datetime, end_datetime: datetime): + """ Get the accumulated precip for 24 hour intervals for a given station code within the specified time interval. + :param session: The ORM/database session. + :param station_code: The numeric code identifying the weather station of interest. + :param start_datetime: The earliest date and time of interest. + :param end_datetime: The latest date and time of interest. + + Note: I couldn't construct this query in SQLAlchemy, hence the need for the 'text' based query. + + generate_series(\'{}\', \'{}\', '24 hours'::interval) + + This gives us a one column table of dates separated by 24 hours between the start and end dates. For example, if start and end dates are 2023-10-31 20:00:00 to 2023-11-03 20:00:00 we would have a table like: + + day + 2023-10-31 20:00:00 + 2023-11-01 20:00:00 + 2023-11-02 20:00:00 + 2023-11-03 20:00:00 + + We then join the HourlyActuals table so that we can sum hourly precip in a 24 hour period. The join is based on the weather_date field in the HourlyActuals table being in a 24 hour range using this odd looking syntax: + + weather_date <@ tstzrange(day, day + '24 hours', '(]') + + Using 2023-10-31 20:00:00 as an example, rows with the following dates would match. The (] syntax means the lower bound is excluded but the upper bound is included. + + 2023-10-31 21:00:00 + 2023-10-31 22:00:00 + 2023-10-31 23:00:00 + 2023-11-01 00:00:00 + 2023-11-01 01:00:00 + .... + 2023-11-01 19:00:00 + 2023-11-01 20:00:00 + """ + stmt = """ + SELECT day, station_code, sum(precipitation) actual_precip_24h + FROM + generate_series(\'{}\', \'{}\', '24 hours'::interval) day + LEFT JOIN + hourly_actuals + ON + weather_date <@ tstzrange(day - INTERVAL '24 hours', day, '(]') + WHERE + station_code = {} + GROUP BY + day, station_code; + """.format(start_datetime, end_datetime, station_code) + result = session.execute(text(stmt)) + return result.all() + + +def get_predicted_daily_precip(session: Session, model: PredictionModel, station_code: int, start_datetime: datetime, end_datetime: datetime): + """ Gets rows from WeatherStationModelPrediction for the given model and station within the + specified time interval at 20:00:00 UTC each day. + :param session: The ORM/database session + :param model: The numeric weather prediction model + :param station_code: The code identifying the weather station. + :param start_datetime: The earliest date and time of interest. + :param end_datetime: The latest date and time of interest. + """ + result = session.query(WeatherStationModelPrediction)\ + .join(PredictionModelRunTimestamp, PredictionModelRunTimestamp.id == WeatherStationModelPrediction.prediction_model_run_timestamp_id)\ + .filter(PredictionModelRunTimestamp.prediction_model_id == model.id)\ + .filter(WeatherStationModelPrediction.station_code == station_code)\ + .filter(WeatherStationModelPrediction.prediction_timestamp >= start_datetime)\ + .filter(WeatherStationModelPrediction.prediction_timestamp < end_datetime)\ + .filter(func.date_part('hour', WeatherStationModelPrediction.prediction_timestamp) == 20)\ + .order_by(WeatherStationModelPrediction.prediction_timestamp) + return result.all() diff --git a/api/app/db/crud/weather_models.py b/api/app/db/crud/weather_models.py index ddf6920bd..150c88249 100644 --- a/api/app/db/crud/weather_models.py +++ b/api/app/db/crud/weather_models.py @@ -220,6 +220,7 @@ def get_latest_station_prediction_mat_view(session: Session, MoreCast2MaterializedView.bias_adjusted_wind_speed, MoreCast2MaterializedView.bias_adjusted_wdir, MoreCast2MaterializedView.precip_24h, + MoreCast2MaterializedView.bias_adjusted_precip_24h, MoreCast2MaterializedView.wdir_tgl_10, MoreCast2MaterializedView.wind_tgl_10, MoreCast2MaterializedView.update_date).\ diff --git a/api/app/db/models/weather_models.py b/api/app/db/models/weather_models.py index 9226cb032..f8687c5c6 100644 --- a/api/app/db/models/weather_models.py +++ b/api/app/db/models/weather_models.py @@ -252,11 +252,13 @@ class WeatherStationModelPrediction(Base): default=time_utils.get_utc_now(), index=True) # Precipitation predicted for the previous 24 hour period. precip_24h = Column(Float, nullable=True) + # Precipitation predicted for the previous 24 hour period adjusted for bias. + bias_adjusted_precip_24h = Column(Float, nullable=True) def __str__(self): return ('{self.station_code} {self.prediction_timestamp} {self.tmp_tgl_2} {self.bias_adjusted_temperature} ' '{self.rh_tgl_2} {self.bias_adjusted_rh} {self.wdir_tgl_10} {self.bias_adjusted_wdir} {self.wind_tgl_10} ' - '{self.bias_adjusted_wind_speed} {self.apcp_sfc_0} {self.delta_precip}').format(self=self) + '{self.bias_adjusted_wind_speed} {self.apcp_sfc_0} {self.delta_precip} {self.bias_adjusted_precip_24h}').format(self=self) class MoreCast2MaterializedView(Base): @@ -267,6 +269,7 @@ class MoreCast2MaterializedView(Base): primary_key=True, nullable=False, index=True) abbreviation = Column(String, nullable=False) apcp_sfc_0 = Column(Float, nullable=False) + bias_adjusted_precip_24h = Column(Float, nullable=False) bias_adjusted_rh = Column(Float, nullable=False) bias_adjusted_temperature = Column(Float, nullable=False) bias_adjusted_wind_speed = Column(Float, nullable=False) diff --git a/api/app/jobs/common_model_fetchers.py b/api/app/jobs/common_model_fetchers.py index 8ac07d163..276c61d78 100644 --- a/api/app/jobs/common_model_fetchers.py +++ b/api/app/jobs/common_model_fetchers.py @@ -247,7 +247,7 @@ def _process_prediction(self, station_prediction.apcp_sfc_0 = 0.0 else: station_prediction.apcp_sfc_0 = prediction.apcp_sfc_0 - # Calculate the delta_precipitation based on station's previous prediction_timestamp + # Calculate the delta_precipitation and 24 hour precip based on station's previous prediction_timestamp # for the same model run self.session.flush() station_prediction.precip_24h = self._calculate_past_24_hour_precip( @@ -280,6 +280,11 @@ def _process_prediction(self, station_prediction.wdir_tgl_10, station_prediction.prediction_timestamp ) + # Predict the 24 hour precipitation + station_prediction.bias_adjusted_precip_24h = machine.predict_precipitation( + station_prediction.precip_24h, + station_prediction.prediction_timestamp + ) # Update the update time (this might be an update) station_prediction.update_date = time_utils.get_utc_now() diff --git a/api/app/tests/fixtures/loader.py b/api/app/tests/fixtures/loader.py index fdde563c7..e4cf7d6e9 100644 --- a/api/app/tests/fixtures/loader.py +++ b/api/app/tests/fixtures/loader.py @@ -92,7 +92,7 @@ def guess_filename(self, url: str, params: dict): if params: filename = filename + '_' for key, value in params.items(): - if type(value) == str: + if type(value) == str: # noqa: E721 value = value.replace(' ', '_') filename = '{}_{}_{}'.format(filename, key, value) filename = filename[:200] + '.json' @@ -110,7 +110,7 @@ def lookup_fixture_filename(self, lookup = nested_defaultdict(lookup) fixture = lookup[url][verb][str(params)][str(data)] - if type(fixture) is not str: + if type(fixture) is not str: # noqa: E721 if config.get('AUTO_MAKE_FIXTURES'): logger.warning('inserting fixture into lookup') lookup[url][verb][str(params)][str(data)] = self.guess_filename(url, params) diff --git a/api/app/tests/weather_models/crud.py b/api/app/tests/weather_models/crud.py index 93d3430d9..63047052f 100644 --- a/api/app/tests/weather_models/crud.py +++ b/api/app/tests/weather_models/crud.py @@ -1,10 +1,19 @@ """ Some crud responses used to mock our calls to app.db.crud """ from datetime import datetime -from app.db.models.weather_models import ModelRunPrediction +from app.db.models.weather_models import ModelRunPrediction, WeatherStationModelPrediction from app.db.models.observations import HourlyActual +class MockActualPrecip: + day: datetime + actual_precip_24h: float + + def __init__(self, day, actual_precip_24h): + self.day=day + self.actual_precip_24h=actual_precip_24h + + def get_actuals_left_outer_join_with_predictions(*args): """ Fixed response as replacement for app.db.crud.observations.get_actuals_left_outer_join_with_predictions """ @@ -89,3 +98,33 @@ def get_actuals_left_outer_join_with_predictions(*args): prediction_timestamp=datetime(2020, 10, 11, 21))] ] return result + +def get_accumulated_precip_by_24h_interval(*args): + """ Fixed response as replacement for app.db.crud.observations.get_accumulated_precip_by_24h_interval + """ + return [ + MockActualPrecip( + day=datetime(2023,10,10,20,0,0), + actual_precip_24h=3 + + ), + MockActualPrecip( + day=datetime(2023,10,11,20,0,0), + actual_precip_24h=3 + ) + ] + + +def get_predicted_daily_precip(*args): + return [ + WeatherStationModelPrediction( + bias_adjusted_precip_24h=None, + precip_24h=3, + prediction_timestamp=datetime(2023,10,10,20,0,0) + ), + WeatherStationModelPrediction( + bias_adjusted_precip_24h=None, + precip_24h=3, + prediction_timestamp=datetime(2023,10,11,20,0,0) + ) + ] diff --git a/api/app/tests/weather_models/test_env_canada_gdps.py b/api/app/tests/weather_models/test_env_canada_gdps.py index d3e2a9eca..694bd0a76 100644 --- a/api/app/tests/weather_models/test_env_canada_gdps.py +++ b/api/app/tests/weather_models/test_env_canada_gdps.py @@ -8,7 +8,6 @@ import pytest import requests from sqlalchemy.orm import Session -from shapely import wkt from app.jobs import env_canada from app.jobs import common_model_fetchers import app.utils.time as time_utils @@ -48,9 +47,6 @@ def mock_get_actuals_left_outer_join_with_predictions(monkeypatch): @pytest.fixture() def mock_database(monkeypatch): """ Mocked out database queries """ - geom = ("POLYGON ((-120.525 50.77500000000001, -120.375 50.77500000000001,-120.375 50.62500000000001," - " -120.525 50.62500000000001, -120.525 50.77500000000001))") - shape = wkt.loads(geom) gdps_url = ('https://dd.weather.gc.ca/model_gem_global/15km/grib2/lat_lon/00/000/' 'CMC_glb_TMP_TGL_2_latlon.15x.15_2020052100_P000.grib2') gdps_processed_model_run = ProcessedModelRunUrl(url=gdps_url) diff --git a/api/app/tests/weather_models/test_env_canada_hrdps.py b/api/app/tests/weather_models/test_env_canada_hrdps.py index ce2a572b6..1aeace0d7 100644 --- a/api/app/tests/weather_models/test_env_canada_hrdps.py +++ b/api/app/tests/weather_models/test_env_canada_hrdps.py @@ -6,7 +6,6 @@ from typing import Optional import pytest import requests -import shapely import datetime from sqlalchemy.orm import Session from pytest_mock import MockerFixture @@ -29,10 +28,6 @@ @pytest.fixture() def mock_database(monkeypatch): """ Mocked out database queries """ - geom = ("POLYGON ((-120.525 50.77500000000001, -120.375 50.77500000000001,-120.375 50.62500000000001," - " -120.525 50.62500000000001, -120.525 50.77500000000001))") - shape = shapely.wkt.loads(geom) - hrdps_url = 'https://dd.weather.gc.ca/model_hrdps/continental/2.5km/' \ '00/001/20200521T00Z_MSC_HRDPS_RH_AGL-2m_RLatLon0.0225_PT001H.grib2' hrdps_processed_model_run = ProcessedModelRunUrl(url=hrdps_url) diff --git a/api/app/tests/weather_models/test_env_canada_rdps.py b/api/app/tests/weather_models/test_env_canada_rdps.py index 7cf2f389c..7874f43e7 100644 --- a/api/app/tests/weather_models/test_env_canada_rdps.py +++ b/api/app/tests/weather_models/test_env_canada_rdps.py @@ -6,7 +6,6 @@ import pytest import requests from typing import Optional -from shapely import wkt from sqlalchemy.orm import Session import app.utils.time as time_utils import app.weather_models.process_grib @@ -24,10 +23,6 @@ @pytest.fixture() def mock_database(monkeypatch): """ Mocked out database queries """ - geom = ("POLYGON ((-120.525 50.77500000000001, -120.375 50.77500000000001,-120.375 50.62500000000001," - " -120.525 50.62500000000001, -120.525 50.77500000000001))") - shape = wkt.loads(geom) - rdps_url = ('https://dd.weather.gc.ca/model_gem_regional/10km/grib2/00/034/' 'CMC_reg_RH_TGL_2_ps10km_2020052100_P034.grib2') rdps_processed_model_run = ProcessedModelRunUrl(url=rdps_url) diff --git a/api/app/tests/weather_models/test_machine_learning.py b/api/app/tests/weather_models/test_machine_learning.py index f577413ad..1b736360f 100644 --- a/api/app/tests/weather_models/test_machine_learning.py +++ b/api/app/tests/weather_models/test_machine_learning.py @@ -1,7 +1,9 @@ from datetime import datetime import pytest from app.weather_models import machine_learning -from app.tests.weather_models.crud import get_actuals_left_outer_join_with_predictions +from app.tests.weather_models.crud import (get_actuals_left_outer_join_with_predictions, + get_accumulated_precip_by_24h_interval, + get_predicted_daily_precip) from app.db.models.weather_models import PredictionModel from app.weather_models.machine_learning import StationMachineLearning @@ -11,9 +13,23 @@ def mock_get_actuals_left_outer_join_with_predictions(monkeypatch): """ Mock out call to DB returning actuals macthed with predictions """ monkeypatch.setattr(machine_learning, 'get_actuals_left_outer_join_with_predictions', get_actuals_left_outer_join_with_predictions) + +@pytest.fixture() +def mock_get_accumulated_precip_by_24h_interval(monkeypatch): + """ Mock out call to DB returning actual 24 hour precipitation data """ + monkeypatch.setattr(machine_learning, 'get_accumulated_precip_by_24h_interval', + get_accumulated_precip_by_24h_interval) + +@pytest.fixture() +def mock_get_predicted_daily_precip(monkeypatch): + """ Mock out call to DB returning modelled/predicted 24 hour precipitation data """ + monkeypatch.setattr(machine_learning, 'get_predicted_daily_precip', + get_predicted_daily_precip) -def test_bias_adjustment_with_samples(mock_get_actuals_left_outer_join_with_predictions): +def test_bias_adjustment_with_samples(mock_get_actuals_left_outer_join_with_predictions, + mock_get_accumulated_precip_by_24h_interval, + mock_get_predicted_daily_precip): predict_date_with_samples = datetime.fromisoformat("2020-09-03T21:14:51.939836+00:00") machine_learner = StationMachineLearning( @@ -30,12 +46,16 @@ def test_bias_adjustment_with_samples(mock_get_actuals_left_outer_join_with_pred temp_result = machine_learner.predict_temperature(20, predict_date_with_samples) rh_result = machine_learner.predict_rh(50, predict_date_with_samples) wdir_result = machine_learner.predict_wind_direction(10, 120, predict_date_with_samples) + precip_result = machine_learner.predict_precipitation(3, datetime(2023,10,26,20,0,0)) assert temp_result == 30 assert rh_result == 100 assert wdir_result == 120 + assert precip_result == 3 -def test_bias_adjustment_without_samples(mock_get_actuals_left_outer_join_with_predictions): +def test_bias_adjustment_without_samples(mock_get_actuals_left_outer_join_with_predictions, + mock_get_accumulated_precip_by_24h_interval, + mock_get_predicted_daily_precip): predict_date_without_samples = datetime.fromisoformat("2020-09-03T01:14:51.939836+00:00") machine_learner = StationMachineLearning( @@ -52,6 +72,8 @@ def test_bias_adjustment_without_samples(mock_get_actuals_left_outer_join_with_p temp_result = machine_learner.predict_temperature(20, predict_date_without_samples) rh_result = machine_learner.predict_rh(50, predict_date_without_samples) wdir_result = machine_learner.predict_wind_direction(10, 290, predict_date_without_samples) + precip_result = machine_learner.predict_precipitation(3, predict_date_without_samples) assert temp_result is None assert rh_result is None assert wdir_result is None + assert precip_result is None diff --git a/api/app/tests/weather_models/test_utils.py b/api/app/tests/weather_models/test_utils.py new file mode 100644 index 000000000..c550514ce --- /dev/null +++ b/api/app/tests/weather_models/test_utils.py @@ -0,0 +1,28 @@ +from app.weather_models.utils import construct_dictionary_from_list_by_property + + +class TestObject: + id: int + name: str + + def __init__(self, id, name): + self.id = id + self.name = name + + +object_1 = TestObject(1, "foo") +object_2 = TestObject(2, "bar") +object_3 = TestObject(3, "foo") + +object_list = [object_1, object_2, object_3] + + +def test_construct_dictionary_from_list_by_property(): + result = construct_dictionary_from_list_by_property(object_list, "name") + assert result is not None + assert len(result.keys()) == 2 + assert len(result["foo"]) == 2 + assert len(result["bar"]) == 1 + assert object_1 in result["foo"] + assert object_2 in result["bar"] + assert object_3 in result["foo"] \ No newline at end of file diff --git a/api/app/tests/wildfire_one/test_schema_parsers.py b/api/app/tests/wildfire_one/test_schema_parsers.py index 7669fbef5..0c695f61c 100644 --- a/api/app/tests/wildfire_one/test_schema_parsers.py +++ b/api/app/tests/wildfire_one/test_schema_parsers.py @@ -1,5 +1,5 @@ from typing import List -from app.schemas.morecast_v2 import StationDailyFromWF1, WeatherDeterminate, WeatherIndeterminate +from app.schemas.morecast_v2 import StationDailyFromWF1, WeatherDeterminate from app.wildfire_one.schema_parsers import (WF1RecordTypeEnum, parse_noon_forecast, parse_hourly_actual, unique_weather_stations_mapper, weather_indeterminate_list_mapper, diff --git a/api/app/weather_models/__init__.py b/api/app/weather_models/__init__.py index 951c92bb9..b9c1f5046 100644 --- a/api/app/weather_models/__init__.py +++ b/api/app/weather_models/__init__.py @@ -11,7 +11,7 @@ # Key values on ModelRunGridSubsetPrediction. # Wind direction (wdir_tgl_10_b) is handled slightly differently, so not included here. -SCALAR_MODEL_VALUE_KEYS = ('tmp_tgl_2', 'rh_tgl_2', 'wind_tgl_10', 'apcp_sfc_0') +SCALAR_MODEL_VALUE_KEYS = ('tmp_tgl_2', 'rh_tgl_2', 'wind_tgl_10') class ModelEnum(str, Enum): diff --git a/api/app/weather_models/fetch/predictions.py b/api/app/weather_models/fetch/predictions.py index d39c13600..1426ed85a 100644 --- a/api/app/weather_models/fetch/predictions.py +++ b/api/app/weather_models/fetch/predictions.py @@ -140,7 +140,7 @@ async def fetch_latest_model_run_predictions_by_station_code_and_date_range(sess daily_result = get_latest_station_prediction_mat_view( session, active_station_codes, day_start, day_end) - for timestamp, model_abbrev, station_code, rh, temp, bias_adjusted_temp, bias_adjusted_rh, bias_adjusted_wind_speed, bias_adjusted_wdir, precip_24hours, wind_dir, wind_speed, update_date in daily_result: + for timestamp, model_abbrev, station_code, rh, temp, bias_adjusted_temp, bias_adjusted_rh, bias_adjusted_wind_speed, bias_adjusted_wdir, precip_24hours, bias_adjusted_precip_24h, wind_dir, wind_speed, update_date in daily_result: # Create two WeatherIndeterminates, one for model predictions and one for bias corrected predictions results.append( @@ -163,6 +163,7 @@ async def fetch_latest_model_run_predictions_by_station_code_and_date_range(sess utc_timestamp=timestamp, temperature=bias_adjusted_temp, relative_humidity=bias_adjusted_rh, + precipitation=bias_adjusted_precip_24h, wind_speed=bias_adjusted_wind_speed, wind_direction=bias_adjusted_wdir )) diff --git a/api/app/weather_models/machine_learning.py b/api/app/weather_models/machine_learning.py index 7c415231c..3cb079503 100644 --- a/api/app/weather_models/machine_learning.py +++ b/api/app/weather_models/machine_learning.py @@ -1,7 +1,7 @@ """ Module for calculating the bias for a weather station use basic Machine Learning through Linear Regression. """ -from datetime import datetime, timedelta +from datetime import date, datetime, timedelta, timezone from collections import defaultdict from typing import List from logging import getLogger @@ -11,9 +11,11 @@ from app.weather_models import SCALAR_MODEL_VALUE_KEYS, construct_interpolated_noon_prediction from app.db.models.weather_models import (PredictionModel, ModelRunPrediction) from app.db.models.observations import HourlyActual -from app.db.crud.observations import get_actuals_left_outer_join_with_predictions -from app.weather_models.weather_models import RegressionModelsV2 +from app.db.crud.observations import (get_accumulated_precip_by_24h_interval, + get_actuals_left_outer_join_with_predictions, + get_predicted_daily_precip) from app.weather_models.sample import Samples +from app.weather_models.weather_models import RegressionModelsV2 from app.weather_models.wind_direction_model import compute_u_v from app.weather_models.wind_direction_utils import calculate_wind_dir_from_u_v @@ -71,8 +73,6 @@ def __init__(self, """ : param session: Database session. : param model: Prediction model, e.g. GDPS - : param grid: Grid in which the station is contained. - : param points: Grid represented as points. : param target_coordinate: Coordinate we're interested in . : param station_code: Code of the weather station. : param max_learn_date: Maximum date up to which to learn. @@ -142,13 +142,9 @@ def _collect_data(self, start_date: datetime): prev_actual = actual return sample_collection - def learn(self): + def _learn_models(self, start_date: datetime): """ Collect data and perform linear regression. """ - # Calculate the date to start learning from. - start_date = self.max_learn_date - \ - timedelta(days=self.max_days_to_learn) - # collect data data = self._collect_data(start_date) @@ -170,6 +166,31 @@ def learn(self): self.session, self.model.id, self.station_code, start_date, self.max_learn_date) self.regression_models_v2.collect_data(query) self.regression_models_v2.train() + + def _learn_precip_model(self, start_date): + """ Collect precip data and perform linear regression. + """ + # Precip is based on 24 hour periods at 20:00 hours UTC. Use the start_date + # parameter to calculate a start_datetime for the same date but at 20:00 UTC. + start_datetime = datetime(start_date.year, start_date.month, start_date.day, 20, tzinfo=timezone.utc) + # The end datetime is yesterday at 20:00 UTC. + end_date = date.today() - timedelta(days=-1) + end_datetime = datetime(end_date.year, end_date.month, end_date.day, 20, tzinfo=timezone.utc) + # Get the actual precip values + actual_daily_precip = get_accumulated_precip_by_24h_interval( + self.session, self.station_code, start_datetime, end_datetime) + # Get the model predicted values + predicted_daily_precip = get_predicted_daily_precip( + self.session, self.model, self.station_code, start_datetime, end_datetime) + self.regression_models_v2.add_precip_samples(actual_daily_precip, predicted_daily_precip) + self.regression_models_v2.train_precip() + + def learn(self): + # Calculate the date to start learning from. + start_date = self.max_learn_date - \ + timedelta(days=self.max_days_to_learn) + self._learn_models(start_date) + self._learn_precip_model(start_date) def predict_temperature(self, model_temperature: float, timestamp: datetime): """ Predict the bias adjusted temperature for a given point in time, given a corresponding model @@ -230,3 +251,19 @@ def predict_wind_direction(self, model_wind_speed: float, model_wind_dir: int, t assert len(predicted_wind_dir) == 2 predicted_wind_dir_deg = calculate_wind_dir_from_u_v(u_v[0], u_v[1]) return predicted_wind_dir_deg + + def predict_precipitation(self, model_precipitation: float, timestamp: datetime): + """ Predict the 24 hour precipitation for a given point in time, given a + corresponding model precipitation. + : param model_precipitation: Precipitation as provided by the model + : param timestamp: Datetime value for the predicted value. + : return: The bias adjusted 24 hour precipitation as predicted by the linear regression model. + """ + if model_precipitation is None: + logger.warning('model precipitation for %s was None', timestamp) + return None + hour = timestamp.hour + predicted_precip_24h = self.regression_models_v2._precip_model.predict(hour, [[model_precipitation]]) + if predicted_precip_24h is None or len(predicted_precip_24h) == 0: + return None + return max(0, predicted_precip_24h[0]) diff --git a/api/app/weather_models/precip_model.py b/api/app/weather_models/precip_model.py new file mode 100644 index 000000000..a96684e18 --- /dev/null +++ b/api/app/weather_models/precip_model.py @@ -0,0 +1,26 @@ +import logging +from datetime import datetime +from app.weather_models.linear_model import LinearModel +from app.weather_models.regression_model import RegressionModelBase + +logger = logging.getLogger(__name__) + +class PrecipModel(RegressionModelBase): + """ + 24 hour accumulated precipitation regression model. + """ + + def __init__(self, linear_model: LinearModel): + self._linear_model = linear_model + + def add_sample(self, actual_value: float, model_value: float, timestamp: datetime): + if actual_value is None: + logger.warning('no actual value for model key: "precip"') + return + if model_value is None: + logger.warning('no model value for model key: "precip"') + return + # Add to the data we're going to learn from: + # Using two variables, the actual or modelled precip value, and the hour of the day. + self._linear_model.append_x_y([model_value], [actual_value], timestamp) + diff --git a/api/app/weather_models/regression_model.py b/api/app/weather_models/regression_model.py index aa7210868..4d9c137ab 100644 --- a/api/app/weather_models/regression_model.py +++ b/api/app/weather_models/regression_model.py @@ -23,11 +23,6 @@ class RegressionModelProto(Protocol): _models: defaultdict[int, LinearRegression] _samples: Samples - @abstractmethod - def add_sample(self, - prediction: ModelRunPrediction, - actual: HourlyActual): raise NotImplementedError - @abstractmethod def train(self): raise NotImplementedError @@ -35,7 +30,19 @@ def train(self): raise NotImplementedError def predict(self, hour: int, model_wind_dir: List[List[int]]): raise NotImplementedError -class RegressionModel(RegressionModelProto): +class RegressionModelBase(RegressionModelProto): + """ + Base class for all weather model regression models. + """ + + def train(self): + return self._linear_model.train() + + def predict(self, hour: int, value: List[List[int]]): + return self._linear_model.predict(hour, value) + + +class RegressionModel(RegressionModelBase): """ Default class to manage a regression dataset """ @@ -44,12 +51,6 @@ def __init__(self, model_key: str, linear_model: LinearModel): self._key = model_key self._linear_model = linear_model - def train(self): - return self._linear_model.train() - - def predict(self, hour: int, model_wind_dir: List[List[int]]): - return self._linear_model.predict(hour, model_wind_dir) - def add_sample(self, prediction: ModelRunPrediction, actual: HourlyActual): diff --git a/api/app/weather_models/utils.py b/api/app/weather_models/utils.py new file mode 100644 index 000000000..f39544e01 --- /dev/null +++ b/api/app/weather_models/utils.py @@ -0,0 +1,15 @@ +def construct_dictionary_from_list_by_property(source: list[any], property: any): + """ Create a dictionary from a list where the dictionary keys are defined by the list item values of the provided + property. + :param source: The source list. + :param property: A property name of the objects in the list. + """ + result = {} + for item in source: + key = getattr(item, property) + if key is None: + continue + if key not in result: + result[key] = [] + result[key].append(item) + return result \ No newline at end of file diff --git a/api/app/weather_models/weather_models.py b/api/app/weather_models/weather_models.py index b49f847ab..a93e8140e 100644 --- a/api/app/weather_models/weather_models.py +++ b/api/app/weather_models/weather_models.py @@ -1,11 +1,14 @@ import logging +import math from typing import List from app.db.models.observations import HourlyActual from app.db.models.weather_models import ModelRunPrediction from app.weather_models import construct_interpolated_noon_prediction from app.weather_models.linear_model import LinearModel +from app.weather_models.precip_model import PrecipModel from app.weather_models.regression_model import RegressionModelProto, model_2_actual_keys from app.weather_models.sample import Samples +from app.weather_models.utils import construct_dictionary_from_list_by_property from app.weather_models.wind_direction_model import WindDirectionModel logger = logging.getLogger(__name__) @@ -21,11 +24,25 @@ def __init__(self): self._models: List[RegressionModelProto] = [ WindDirectionModel(linear_model=LinearModel(samples=Samples())) ] + self._precip_model: RegressionModelProto = PrecipModel(linear_model=LinearModel(samples=Samples())) def add_samples(self, prediction: ModelRunPrediction, actual: HourlyActual): for model in self._models: model.add_sample(prediction, actual) + def add_precip_samples(self, actual_values, predicted_values): + predicted_values_dict = construct_dictionary_from_list_by_property(predicted_values, "prediction_timestamp") + for actual in actual_values: + daily_actual = actual.actual_precip_24h + timestamp = actual.day + daily_predicted_list = predicted_values_dict.get(timestamp) + if daily_predicted_list is None: + continue + for daily_predicted in daily_predicted_list: + precip_24h = daily_predicted.precip_24h + if daily_actual is not None and not math.isnan(daily_actual) and precip_24h is not None and not math.isnan(precip_24h): + self._precip_model.add_sample(daily_actual, daily_predicted.precip_24h, timestamp) + def collect_data(self, query): # We need to keep track of previous so that we can do interpolation for the global model. prev_actual = None @@ -52,3 +69,6 @@ def train(self): # and each hour. for model in self._models: model.train() + + def train_precip(self): + self._precip_model.train() diff --git a/api/app/weather_models/wind_direction_model.py b/api/app/weather_models/wind_direction_model.py index 3a263288d..d68305501 100644 --- a/api/app/weather_models/wind_direction_model.py +++ b/api/app/weather_models/wind_direction_model.py @@ -1,10 +1,9 @@ import logging import math -from typing import List from app.db.models.observations import HourlyActual from app.db.models.weather_models import ModelRunPrediction from app.weather_models.linear_model import LinearModel -from app.weather_models.regression_model import RegressionModelProto +from app.weather_models.regression_model import RegressionModelBase from app.weather_models.wind_direction_utils import compute_u_v logger = logging.getLogger(__name__) @@ -17,7 +16,7 @@ def any_none_or_nan(prediction: ModelRunPrediction, actual: HourlyActual): actual.wind_speed is None or math.isnan(actual.wind_speed) -class WindDirectionModel(RegressionModelProto): +class WindDirectionModel(RegressionModelBase): """ Wind direction regression model that decomposes degree direction into the u, v vectors that make it up. See: http://colaweb.gmu.edu/dev/clim301/lectures/wind/wind-uv @@ -27,12 +26,6 @@ class WindDirectionModel(RegressionModelProto): def __init__(self, linear_model: LinearModel): self._linear_model = linear_model - def train(self): - return self._linear_model.train() - - def predict(self, hour: int, model_wind_dir: List[List[int]]): - return self._linear_model.predict(hour, model_wind_dir) - def add_sample(self, prediction: ModelRunPrediction, actual: HourlyActual): diff --git a/web/src/features/moreCast2/components/MoreCast2Column.tsx b/web/src/features/moreCast2/components/MoreCast2Column.tsx index 9ee8ac2a8..c3058c36e 100644 --- a/web/src/features/moreCast2/components/MoreCast2Column.tsx +++ b/web/src/features/moreCast2/components/MoreCast2Column.tsx @@ -111,7 +111,7 @@ export const tempForecastField = new IndeterminateField('temp', 'Temp', 'number' export const rhForecastField = new IndeterminateField('rh', 'RH', 'number', 0, true) export const windDirForecastField = new IndeterminateField('windDirection', 'Wind Dir', 'number', 0, true) export const windSpeedForecastField = new IndeterminateField('windSpeed', 'Wind Speed', 'number', 1, true) -export const precipForecastField = new IndeterminateField('precip', 'Precip', 'number', 1, false) +export const precipForecastField = new IndeterminateField('precip', 'Precip', 'number', 1, true) export const buiField = new IndeterminateField('bui', 'BUI', 'number', 0, false) export const isiField = new IndeterminateField('isi', 'ISI', 'number', 1, false) export const fwiField = new IndeterminateField('fwi', 'FWI', 'number', 0, false)