From 7f7d450b2b58b859e5b4358b7f0e8d903fcf8cca Mon Sep 17 00:00:00 2001 From: Darren Boss Date: Fri, 1 Sep 2023 15:14:44 -0700 Subject: [PATCH 01/11] Use 12km NAM Calculate cumulative precip --- api/app/jobs/common_model_fetchers.py | 21 +++++-- api/app/jobs/model_data.md | 85 ++++++++++++++++++++++++--- api/app/jobs/noaa.py | 27 ++++++--- 3 files changed, 114 insertions(+), 19 deletions(-) diff --git a/api/app/jobs/common_model_fetchers.py b/api/app/jobs/common_model_fetchers.py index 857d64fb1..a0215c7de 100644 --- a/api/app/jobs/common_model_fetchers.py +++ b/api/app/jobs/common_model_fetchers.py @@ -172,13 +172,16 @@ class ModelValueProcessor: """ Iterate through model runs that have completed, and calculate the interpolated weather predictions. """ + # The NAM reports cumulative precipitation in three hour intervals. + NAM_ACCUMULATION_INTERVAL = 3 + def __init__(self, session, station_source: StationSourceEnum = StationSourceEnum.UNSPECIFIED): """ Prepare variables we're going to use throughout """ self.session = session self.stations = get_stations_synchronously(station_source) self.station_count = len(self.stations) - def _process_model_run(self, model_run: PredictionModelRunTimestamp): + def _process_model_run(self, model_run: PredictionModelRunTimestamp, model_type: ModelEnum): """ Interpolate predictions in the provided model run for all stations. """ logger.info('Interpolating values for model run: %s', model_run) # Iterate through stations. @@ -188,7 +191,7 @@ def _process_model_run(self, model_run: PredictionModelRunTimestamp): index, self.station_count, station.code, station.name) # Process this model run for station. - self._process_model_run_for_station(model_run, station) + self._process_model_run_for_station(model_run, station, model_type) # Commit all the weather station model predictions (it's fast if we line them all up and commit # them in one go.) logger.info('commit to database...') @@ -320,7 +323,8 @@ def _calculate_delta_precip(self, station, model_run, prediction, station_predic def _process_model_run_for_station(self, model_run: PredictionModelRunTimestamp, - station: WeatherStation): + station: WeatherStation, + model_type: ModelEnum): """ Process the model run for the provided station. """ # Extract the coordinate. @@ -353,9 +357,18 @@ def _process_model_run_for_station(self, query = get_model_run_predictions_for_grid( self.session, model_run, grid) + cumulative_precip = [0.0, 0.0, 0.0, 0.0] # Iterate through all the predictions. prev_prediction = None for prediction in query: + if model_type == ModelEnum.NAM: + # Add up the cumulative precipitation over the course of the model run + if prediction.apcp_sfc_0 is None: + prediction.apcp_sfc_0 = [0.0, 0.0, 0.0, 0.0] + temp_precip = numpy.add(cumulative_precip, prediction.apcp_sfc_0) + if prediction.prediction_timestamp.hour % self.NAM_ACCUMULATION_INTERVAL == 0: + cumulative_precip = temp_precip + prediction.apcp_sfc_0 = temp_precip if (prev_prediction is not None and prev_prediction.prediction_timestamp.hour == 18 and prediction.prediction_timestamp.hour == 21): @@ -384,7 +397,7 @@ def process(self, model_type: ModelEnum): logger.info('model %s', model) logger.info('model_run %s', model_run) # Process the model run. - self._process_model_run(model_run) + self._process_model_run(model_run, model_type) # Mark the model run as interpolated. self._mark_model_run_interpolated(model_run) refresh_morecast2_materialized_view(self.session) diff --git a/api/app/jobs/model_data.md b/api/app/jobs/model_data.md index a4a4f8f56..6e3189b90 100644 --- a/api/app/jobs/model_data.md +++ b/api/app/jobs/model_data.md @@ -23,6 +23,7 @@ - [Linear Interpolation](#linear-interpolation) - [From grids to fire weather stations](#from-grids-to-fire-weather-stations) - [To 12:00 PST (13:00 PDT)](#to-1200-pst-1300-pdt) + - [Calculating predicted 24 hour precipitation](#calculating-predicted-24-hour-precipitation) - [Cronjobs](#cronjobs) @@ -66,7 +67,7 @@ The GDPS, RDPS, and HRDPS models have different cycle lengths, different resolut #### Relevant Weather Variables -The GDPS, RDPS, and HRDPS weather variables that we currently fetch for use in Morecast are `TMP_TGL_2m` (temperature), `RELH_TGL_2m` (relative humidity), `WIND_TGL_10m` (wind speed), `WDIR_TGL_10m` (wind direction), and `APCP_SFC_0` (accumulated precipitation). +The GDPS, RDPS, and HRDPS weather variables that we currently fetch for use in Morecast are `TMP_TGL_2m` (temperature), `RELH_TGL_2m` (relative humidity), `WIND_TGL_10m` (wind speed), `WDIR_TGL_10m` (wind direction), and `APCP_SFC_0` (accumulated precipitation since the start of the model run). ### NOAA @@ -105,25 +106,83 @@ NAM model data is published on a Polar Stereographic projection at a 32km resolu The NAM model makes predictions for every hour for the first 36 hours, and then on a 3-hourly schedule to 84 hours [https://mag.ncep.noaa.gov/model-guidance-model-parameter.php?group=Model%20Guidance&model=NAM&area=NAMER&ps=area#]. As with the GFS model, we sometimes need to fetch data for 2 different hour offsets and then perform linear interpolation in order to have a weather prediction for 12:00 PST. The file naming convention that NAM uses indicates the model runtime as hours in UTC, and indexes that hour as 00. All subsequent hours in the model run are referenced as offsets of the 00 hour index. This means that for the 00:00 UTC model cycle, 20:00 UTC (or 12:00 PST) is referenced as hour 20; but for the 12:00 UTC model cycle, 20:00 UTC is referenced as hour 8 (20:00 - 12:00). Therefore, to fetch the noon PST data we need, the NAM cronjob pulls the following model hours: - For the 00:00 UTC model run time: + - Accumulation hours: 12, 24, 36, 48, 60 - Day 0: hour 20 (20:00 UTC = 12:00 PST) - Day 1: hours 42 and 45, to interpolate for hour 44 (20:00 UTC) - Day 2: hours 66 and 69, to interpolate for hour 68 (20:00 UTC) - For the 06:00 UTC model run time: + - Accumulation hours: 6, 12, 18, 24, 30, 36, 42, 48, 60 - Day 0: hour 14 (6+14 = 20:00 UTC) - Day 1: hours 36 and 39, to interpolate for hour 38 (38-24=14 hours, 14+06:00 = 20:00 UTC of the next day) - Day 2: hours 60 and 63, to interpolate for hour 62 (20:00 UTC of 2 days later) - For the 12:00 UTC model run time: + - Accumulation hours: 12, 24, 36, 48, 60, 72 - Day 0: hour 8 (12+8 = 20:00 UTC) - Day 1: hour 32 (12+32-24 = 20:00 UTC of the next day) - Day 2: hours 54 and 57, to interpolate for hour 56 (56-48 hours = 8 hours, 8+12:00 = 20:00 UTC of 2 days later) - For the 18:00 UTC model run time: + - Accumulation hours: 6, 12, 18, 24, 30, 36, 42, 48, 60, 66, 72 - Day 0: hour 2 (18+2 = 20:00 UTC) - Day 1: hour 26 (18+26-24 = 20:00 UTC of the next day) - Day 2: hours 48 and 51, to interpolate for hour 50 (50-48 hours = 2 hours, 2 + 18:00 = 20:00 UTC of 2 days later) +Note that the accumulation hours are required in order for us to calculate the cumulative precipitation across the model run which is needed when we subsequently calculate cumulative 24 hour precipitation. + #### Relevant Weather Variables -To be consistent with the weather variables that we pull from the Environment Canada models, for GFS we use comparable weather variables. There is a slight exception for wind direction and speed: Env Canada models output the separate U and V components for wind, but they also output the calculated wind speed and direction using those U, V components. NOAA only outputs the U,V components for wind - we must do the wind direction and wind speed calculations ourselves. +To be consistent with the weather variables that we pull from the Environment Canada models, for GFS we use comparable weather variables. There is a slight exception for wind direction and speed: Env Canada models output the separate U and V components for wind, but they also output the calculated wind speed and direction using those U, V components. NOAA only outputs the U,V components for wind - we must do the wind direction and wind speed calculations ourselves. The NOAA grib files for GFS contain two layers related to cumulative precipitation. Both layers are named 'apcp_sfc_0' but accumulate precipitation differently. Raster band 6 accumulates precip in 3 hour increments for odd hours and six hour increments for even hours, but we actually ignore this band. We use band 7 which accumulats precipitation across the model run. + +The NAM grib file does not contain a band for cumulative precipitation since the start of the model. This requires us to use band 6 which accumulates precipitation slightly differently for model run hours 00 and 12 versus 06 and 18. The 00 and 12 hour model runs contain cumulative precipitation in 12 hour cycles. + +For the 00 and 12 hour model runs: + +| Hour from start | Cumulative precip hours | +| --------------- | ----------------------- | +| 1 | 0 - 1 | +| 2 | 0 - 2 | +| 3 | 0 - 3 | +| ... | ... - ... | +| 12 | 0 - 12 | +| 13 | 12 - 13 | +| 14 | 12 - 14 | +| ... | ... - ... | +| 24 | 12 - 24 | +| 25 | 24 - 25 | +| ... | ... - ... | +| 36 | 24 - 36 | +| 39 | 36 - 39 | +| 42 | 36 - 42 | +| 45 | 36 - 45 | +| 48 | 36 - 48 | +| 51 | 48 - 51 | +| ... | ... - ... | + +For the 06 and 18 hour model runs: + +| Hour from start | Cumulative precip hours | +| --------------- | ----------------------- | +| 1 | 0 - 1 | +| 2 | 0 - 2 | +| 3 | 0 - 3 | +| 4 | 3 - 4 | +| 5 | 3 - 5 | +| 6 | 0 - 6 | +| 7 | 6 - 7 | +| 8 | 6 - 8 | +| 9 | 6 - 9 | +| 10 | 9 - 10 | +| 11 | 9 - 11 | +| 12 | 6 - 12 | +| 13 | 12 - 13 | +| ... | ... - ... | +| 36 | 30 - 36 | +| 39 | 36 - 39 | +| 42 | 36 - 42 | +| 45 | 42 - 45 | +| 48 | 42 - 48 | +| ... | ... - ... | + + ## Downloading Data @@ -135,15 +194,21 @@ There is a lot of data to download and analyze for these weather models, so to e Once model data has been downloaded, we store the model data for "grid subsets" relevant to our use in our `wps` Postgres database in the `model_run_grid_subset_predictions` table. Each row in this table stores temperature, RH, precipitation, wind speed, and wind direction data, as we need for Morecast. For each of these weather variables, each row contains an array of 4 data points, corresponding to the 4 corners of one square of the grid. The `prediction_model_grid_subset_id` in this table corresponds to a grid cell that contains one of BC's fire weather station locations, as stored in the `prediction_model_grid_subsets` table. When our backend inserts data into the `model_run_grid_subset_predictions` table, it also adds metadata pertinent to the specific model run. -Once the `model_run_grid_subset_predictions` table has been populated for a given model run, our backend then performs linear interpolation based on the geographic location of each weather station to calculate an approximated value for each weather variable. +Our back-end used to perform spatial interpolation using the four values in the grid subset. We no longer follow this process. In order to align with SpotWX, we now use the value from the grid cell in the grib file in which the weather station is located. These values are stored in the `weather_station_model_predictions` table. + +TODO - We need to amend our back-end code to only pull the necessary value from the grib files instead of pulling these grid subsets (4 values). + +~~Once the `model_run_grid_subset_predictions` table has been populated for a given model run, our backend then performs linear interpolation based on the geographic location of each weather station to calculate an approximated value for each weather variable.~~ -For example, a small portion of the grid for a temperature raster band of a GRIB file might look like the image below. In this example, each point on the grid is spaced 5km apart. You can see that a fire weather station is located within one cell of this grid, where the 4 corners of the cell have temperature values of 3.1°, 2.4°, 2.7°, and 2.7°C. In order to approximate the predicted temperature at the weather station's exact location, we perform linear interpolation of the 4 temperature values listed above, where the weight of each data point is inversely proportional to the distance from the weather station to that point on the grid. Consequently, the 2.4°C from the top right corner of the cell will have the highest weight as it is closest to the weather station, and the 2.7°C from the bottom left corner will have the lowest weight as it is the furthest away. +~~For example, a small portion of the grid for a temperature raster band of a GRIB file might look like the image below. In this example, each point on the grid is spaced 5km apart. You can see that a fire weather station is located within one cell of this grid, where the 4 corners of the cell have temperature values of 3.1°, 2.4°, 2.7°, and 2.7°C. In order to approximate the predicted temperature at the weather station's exact location, we perform linear interpolation of the 4 temperature values listed above, where the weight of each data point is inversely proportional to the distance from the weather station to that point on the grid. Consequently, the 2.4°C from the top right corner of the cell will have the highest weight as it is closest to the weather station, and the 2.7°C from the bottom left corner will have the lowest weight as it is the furthest away.~~ -![Location-based linear interpolation for a weather station](../../../docs/images/Grid_wx_station.png) +~~![Location-based linear interpolation for a weather station](../../../docs/images/Grid_wx_station.png)~~ -This linear interpolation process is repeated for each of the weather variables in a model run, and for each of the weather stations in BC. These calculations are stored in the `weather_station_model_predictions` table. +~~This linear interpolation process is repeated for each of the weather variables in a model run, and for each of the weather stations in BC. These calculations are stored in the `weather_station_model_predictions` table.~~ -At the present time we are only performing linear interpolation from grid subsets to weather stations based on geographic distance (because it is the simplest method of interpolation). Forecasters have requested that we also perform linear interpolation based on station elevation as the results may be more accurate - this is a pending story in our backlog. +~~At the present time we are only performing linear interpolation from grid subsets to weather stations based on geographic distance (because it is the simplest method of interpolation).~~ + +Forecasters have requested that we also perform linear interpolation based on station elevation as the results may be more accurate - this is a pending story in our backlog. ### To 12:00 PST (13:00 PDT) @@ -151,6 +216,12 @@ Fire weather forecasters in the BC Wildfire Service create daily "noon forecasts This time-based linear interpolation is done as part of the data analysis process performed when the model data is retrieved from our third-party sources. See `process_grib.py` +### Calculating predicted 24 hour precipitation + +For any given date, Morecast 2 displays the precipitation predicted to fall in the 24 hours previous to that date. For example, for August 30, 2023, the predicted precip covers a time period from August 29, 2023 20:00 UTC to August 30, 2023 20:00 UTC. The first 24 hours of a numerical weather model run present a challenge because there is no way to calculate the predicted 24 hour precip purely from the model run data. Consider a HRDPS model run on August 30 at 12:00 UTC. Morecast 2 needs to display the predicted precipitation for August 30 at 20:00, but at that point we only have 8 hours of accumulated precipitation (from 12:00 to 20:00) from our model run, we are potentially missing precipitation that could have fallen from Aug 29 20:00 to August 30 12:00 which means we're missing 16 hours of data. To work around this, we pull actual rainfall values for that 16 hour period from our hourly_actuals table which scrapes observed values fromthe WF1 API. + +Continuing on with the above example, we also need to display 24 hour predicted cumulative precip for August 31 20:00. Since our model run contains data from more than 24 hours ago, we can perform simple subtraction of accumulated precipitation to get our 24 hour predicted cumulative precip (Aug 31 20:00 - Aug 30 20:00). + ## Cronjobs We have created a separate cronjob for each model, to retrieve the data from source at various times of day, depending on how often each model is run. Our cronjobs are set to execute at randomly selected minutes within the set hour, as a courtesy to our third-party data sources so that their servers are not overwhelmed with many simultaneous requests. \ No newline at end of file diff --git a/api/app/jobs/noaa.py b/api/app/jobs/noaa.py index c9e4ba4e8..d49078c7a 100644 --- a/api/app/jobs/noaa.py +++ b/api/app/jobs/noaa.py @@ -37,7 +37,7 @@ # ------- NAM static variables ----------- # -NAM_BASE_URL = 'https://nomads.ncep.noaa.gov/cgi-bin/filter_nam_na.pl?' +NAM_BASE_URL = 'https://nomads.ncep.noaa.gov/cgi-bin/filter_nam.pl?' # -------------------------------------- # @@ -90,7 +90,13 @@ def get_noaa_subregion_filter_str() -> str: def get_nam_model_run_download_urls(download_date: datetime.datetime, model_cycle: str) -> Generator[str, None, None]: - """ Yield URLs to download NAM North America model runs """ + """ Yield URLs to download NAM North America model runs. """ + # The NAM does not accumulate precipitation throughout the model run; rather, for simplicity sake, they + # accumulate precipitation at 3 hour intervals. base_accumulation_hours are the three hour intervals that all + # four model runs have in common. The accumulation_hours are the additional three hour intervals we need to process + # for each model run in order to calculate a running total of cumulative precipitation throughout the course of a + # model run. There is some overlap between base_accumulation_hours and before_noon/after_noon, so we create a set + # which is converted back to a list. # for model_cycle 00: # for first day, need hour 20:00 UTC (13:00 PST) # Day 2: need hours 42 and 45 to interpolate for hour 44 (13:00 PST) @@ -110,24 +116,29 @@ def get_nam_model_run_download_urls(download_date: datetime.datetime, model_cycl # for first day, need hour 2 (18 + 2 = 20:00 UTC) # for second day, need hour 26 (18 + (26-24) = 20:00 UTC) # Day 3: need hours 48 and 51 to interpolate for hour 50 (13:00 PST) + base_accumulation_hours = [hour for hour in range(0, 67, 3)] if model_cycle == '00': + accumulation_hours = base_accumulation_hours noon = [20] before_noon = [42, 66] after_noon = [45, 69] elif model_cycle == '06': + accumulation_hours = base_accumulation_hours noon = [14] before_noon = [36, 60] after_noon = [39, 63] elif model_cycle == '12': + accumulation_hours = base_accumulation_hours + [66, 69, 72, 75] noon = [8, 32] - before_noon = [54] - after_noon = [57] + before_noon = [54, 78] + after_noon = [57, 81] elif model_cycle == '18': + accumulation_hours = base_accumulation_hours + [66, 69] noon = [2, 26] - before_noon = [48] - after_noon = [51] + before_noon = [48, 72] + after_noon = [51, 75] - all_hours = noon + before_noon + after_noon + all_hours = list(set(accumulation_hours + noon + before_noon + after_noon)) # sort list purely for human convenience when debugging. Functionally it doesn't matter all_hours.sort() @@ -138,7 +149,7 @@ def get_nam_model_run_download_urls(download_date: datetime.datetime, model_cycl for fcst_hour in all_hours: hh = format(fcst_hour, '02d') - filter_str = f'dir=%2Fnam.{year_mo_date}&file=nam.t{model_cycle}z.awip32{hh}.tm00.grib2&' + filter_str = f'dir=%2Fnam.{year_mo_date}&file=nam.t{model_cycle}z.awphys{hh}.tm00.grib2&' wx_vars_filter_str = get_noaa_wx_variables_filter_str() levels_filter_str = get_noaa_levels_filter_str() subregion_filter_str = get_noaa_subregion_filter_str() From 431489f288594fb02f430fbd38506ae18216c897 Mon Sep 17 00:00:00 2001 From: Darren Boss Date: Tue, 5 Sep 2023 16:18:52 -0700 Subject: [PATCH 02/11] Fix NAM accumulation Fix 00 and 12 hour accumulation interval --- api/app/jobs/noaa.py | 56 ++++++++++++++------------------------------ 1 file changed, 18 insertions(+), 38 deletions(-) diff --git a/api/app/jobs/noaa.py b/api/app/jobs/noaa.py index d49078c7a..c87026970 100644 --- a/api/app/jobs/noaa.py +++ b/api/app/jobs/noaa.py @@ -91,52 +91,32 @@ def get_noaa_subregion_filter_str() -> str: def get_nam_model_run_download_urls(download_date: datetime.datetime, model_cycle: str) -> Generator[str, None, None]: """ Yield URLs to download NAM North America model runs. """ - # The NAM does not accumulate precipitation throughout the model run; rather, for simplicity sake, they - # accumulate precipitation at 3 hour intervals. base_accumulation_hours are the three hour intervals that all - # four model runs have in common. The accumulation_hours are the additional three hour intervals we need to process - # for each model run in order to calculate a running total of cumulative precipitation throughout the course of a - # model run. There is some overlap between base_accumulation_hours and before_noon/after_noon, so we create a set - # which is converted back to a list. - # for model_cycle 00: - # for first day, need hour 20:00 UTC (13:00 PST) - # Day 2: need hours 42 and 45 to interpolate for hour 44 (13:00 PST) - # Day 3: need hours 66 and 69 to interpolate for hour 68 (13:00 PST) - # - # for model_cycle 06: - # for first day, need hour 14 (14+6 = 20:00 UTC) - # Day 2: need hours 36 and 39 to interpolate for hour 38 (13:00 PST) - # Day 3: need hours 60 and 63 to interpolate for hour 62 (13:00 PST) - # - # for model_cycle 12: - # for first day, need hour 8 (12+8 = 20:00 UTC) - # for second day, need hour 32 (12 + (32-24) = 20:00 UTC) - # Day 3: need hours 54 and 57 to interpolate for hour 56 (13:00 PST) - # - # for model_cycle 18: - # for first day, need hour 2 (18 + 2 = 20:00 UTC) - # for second day, need hour 26 (18 + (26-24) = 20:00 UTC) - # Day 3: need hours 48 and 51 to interpolate for hour 50 (13:00 PST) - base_accumulation_hours = [hour for hour in range(0, 67, 3)] + # The NAM does not accumulate precipitation throughout the model run. The 00 and 12 hour model runs acculmulate + # precip at 12 hour intervals and the 06 and 18 hour model runs accumulate precip at 3 hour intervals. + # The accumulation_hours represent the hours needed in order to calculate accumulated precipitation for the model + # run. The noon variables contain a list of 20:00 UTC time for which a prediction exits for the NAM. The before_noon + # and after_noon variables contain lists of 18:00 UTC times and 21:00 UTC times needed for interpolating to 20:00 + # UTC as exact 20:00 UTC predictions do not exist beyond hour 36 of the model run. if model_cycle == '00': - accumulation_hours = base_accumulation_hours + accumulation_hours = [hour for hour in range(0, 61, 12)] noon = [20] - before_noon = [42, 66] - after_noon = [45, 69] + before_noon = [18, 42, 66] + after_noon = [21, 45, 69] elif model_cycle == '06': - accumulation_hours = base_accumulation_hours + accumulation_hours = [hour for hour in range(0, 67, 3)] noon = [14] - before_noon = [36, 60] - after_noon = [39, 63] + before_noon = [12, 36, 60] + after_noon = [15, 39, 63] elif model_cycle == '12': - accumulation_hours = base_accumulation_hours + [66, 69, 72, 75] + accumulation_hours = [hour for hour in range(0, 73, 12)] noon = [8, 32] - before_noon = [54, 78] - after_noon = [57, 81] + before_noon = [6, 30, 54, 78] + after_noon = [9, 33, 57, 81] elif model_cycle == '18': - accumulation_hours = base_accumulation_hours + [66, 69] + accumulation_hours = [hour for hour in range(0, 70, 3)] noon = [2, 26] - before_noon = [48, 72] - after_noon = [51, 75] + before_noon = [0, 24, 48, 72] + after_noon = [3, 27, 51, 75] all_hours = list(set(accumulation_hours + noon + before_noon + after_noon)) # sort list purely for human convenience when debugging. Functionally it doesn't matter From e805185d358c2027a5423358c3fd4d151b039697 Mon Sep 17 00:00:00 2001 From: Darren Boss Date: Tue, 5 Sep 2023 16:22:35 -0700 Subject: [PATCH 03/11] Missed a file --- api/app/jobs/common_model_fetchers.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/api/app/jobs/common_model_fetchers.py b/api/app/jobs/common_model_fetchers.py index a0215c7de..9550ece88 100644 --- a/api/app/jobs/common_model_fetchers.py +++ b/api/app/jobs/common_model_fetchers.py @@ -360,13 +360,18 @@ def _process_model_run_for_station(self, cumulative_precip = [0.0, 0.0, 0.0, 0.0] # Iterate through all the predictions. prev_prediction = None + + # 00 and 12 hour model runs accumulate precipitation in 12 hour intervals, 06 and 18 hour accumlate in + # 3 hour intervals + model_run_hour = model_run.prediction_run_timestamp.hour + nam_accumulation_interval = 3 if model_run_hour == 6 or model_run_hour == 18 else 12 for prediction in query: if model_type == ModelEnum.NAM: # Add up the cumulative precipitation over the course of the model run if prediction.apcp_sfc_0 is None: prediction.apcp_sfc_0 = [0.0, 0.0, 0.0, 0.0] temp_precip = numpy.add(cumulative_precip, prediction.apcp_sfc_0) - if prediction.prediction_timestamp.hour % self.NAM_ACCUMULATION_INTERVAL == 0: + if prediction.prediction_timestamp.hour % nam_accumulation_interval == 0: cumulative_precip = temp_precip prediction.apcp_sfc_0 = temp_precip if (prev_prediction is not None From e436e454387a7c9ed6870c28f2d029b427f1f3ee Mon Sep 17 00:00:00 2001 From: Darren Boss Date: Fri, 1 Sep 2023 15:14:44 -0700 Subject: [PATCH 04/11] Use 12km NAM Calculate cumulative precip --- api/app/jobs/common_model_fetchers.py | 21 +++++-- api/app/jobs/model_data.md | 85 ++++++++++++++++++++++++--- api/app/jobs/noaa.py | 27 ++++++--- 3 files changed, 114 insertions(+), 19 deletions(-) diff --git a/api/app/jobs/common_model_fetchers.py b/api/app/jobs/common_model_fetchers.py index 857d64fb1..a0215c7de 100644 --- a/api/app/jobs/common_model_fetchers.py +++ b/api/app/jobs/common_model_fetchers.py @@ -172,13 +172,16 @@ class ModelValueProcessor: """ Iterate through model runs that have completed, and calculate the interpolated weather predictions. """ + # The NAM reports cumulative precipitation in three hour intervals. + NAM_ACCUMULATION_INTERVAL = 3 + def __init__(self, session, station_source: StationSourceEnum = StationSourceEnum.UNSPECIFIED): """ Prepare variables we're going to use throughout """ self.session = session self.stations = get_stations_synchronously(station_source) self.station_count = len(self.stations) - def _process_model_run(self, model_run: PredictionModelRunTimestamp): + def _process_model_run(self, model_run: PredictionModelRunTimestamp, model_type: ModelEnum): """ Interpolate predictions in the provided model run for all stations. """ logger.info('Interpolating values for model run: %s', model_run) # Iterate through stations. @@ -188,7 +191,7 @@ def _process_model_run(self, model_run: PredictionModelRunTimestamp): index, self.station_count, station.code, station.name) # Process this model run for station. - self._process_model_run_for_station(model_run, station) + self._process_model_run_for_station(model_run, station, model_type) # Commit all the weather station model predictions (it's fast if we line them all up and commit # them in one go.) logger.info('commit to database...') @@ -320,7 +323,8 @@ def _calculate_delta_precip(self, station, model_run, prediction, station_predic def _process_model_run_for_station(self, model_run: PredictionModelRunTimestamp, - station: WeatherStation): + station: WeatherStation, + model_type: ModelEnum): """ Process the model run for the provided station. """ # Extract the coordinate. @@ -353,9 +357,18 @@ def _process_model_run_for_station(self, query = get_model_run_predictions_for_grid( self.session, model_run, grid) + cumulative_precip = [0.0, 0.0, 0.0, 0.0] # Iterate through all the predictions. prev_prediction = None for prediction in query: + if model_type == ModelEnum.NAM: + # Add up the cumulative precipitation over the course of the model run + if prediction.apcp_sfc_0 is None: + prediction.apcp_sfc_0 = [0.0, 0.0, 0.0, 0.0] + temp_precip = numpy.add(cumulative_precip, prediction.apcp_sfc_0) + if prediction.prediction_timestamp.hour % self.NAM_ACCUMULATION_INTERVAL == 0: + cumulative_precip = temp_precip + prediction.apcp_sfc_0 = temp_precip if (prev_prediction is not None and prev_prediction.prediction_timestamp.hour == 18 and prediction.prediction_timestamp.hour == 21): @@ -384,7 +397,7 @@ def process(self, model_type: ModelEnum): logger.info('model %s', model) logger.info('model_run %s', model_run) # Process the model run. - self._process_model_run(model_run) + self._process_model_run(model_run, model_type) # Mark the model run as interpolated. self._mark_model_run_interpolated(model_run) refresh_morecast2_materialized_view(self.session) diff --git a/api/app/jobs/model_data.md b/api/app/jobs/model_data.md index a4a4f8f56..6e3189b90 100644 --- a/api/app/jobs/model_data.md +++ b/api/app/jobs/model_data.md @@ -23,6 +23,7 @@ - [Linear Interpolation](#linear-interpolation) - [From grids to fire weather stations](#from-grids-to-fire-weather-stations) - [To 12:00 PST (13:00 PDT)](#to-1200-pst-1300-pdt) + - [Calculating predicted 24 hour precipitation](#calculating-predicted-24-hour-precipitation) - [Cronjobs](#cronjobs) @@ -66,7 +67,7 @@ The GDPS, RDPS, and HRDPS models have different cycle lengths, different resolut #### Relevant Weather Variables -The GDPS, RDPS, and HRDPS weather variables that we currently fetch for use in Morecast are `TMP_TGL_2m` (temperature), `RELH_TGL_2m` (relative humidity), `WIND_TGL_10m` (wind speed), `WDIR_TGL_10m` (wind direction), and `APCP_SFC_0` (accumulated precipitation). +The GDPS, RDPS, and HRDPS weather variables that we currently fetch for use in Morecast are `TMP_TGL_2m` (temperature), `RELH_TGL_2m` (relative humidity), `WIND_TGL_10m` (wind speed), `WDIR_TGL_10m` (wind direction), and `APCP_SFC_0` (accumulated precipitation since the start of the model run). ### NOAA @@ -105,25 +106,83 @@ NAM model data is published on a Polar Stereographic projection at a 32km resolu The NAM model makes predictions for every hour for the first 36 hours, and then on a 3-hourly schedule to 84 hours [https://mag.ncep.noaa.gov/model-guidance-model-parameter.php?group=Model%20Guidance&model=NAM&area=NAMER&ps=area#]. As with the GFS model, we sometimes need to fetch data for 2 different hour offsets and then perform linear interpolation in order to have a weather prediction for 12:00 PST. The file naming convention that NAM uses indicates the model runtime as hours in UTC, and indexes that hour as 00. All subsequent hours in the model run are referenced as offsets of the 00 hour index. This means that for the 00:00 UTC model cycle, 20:00 UTC (or 12:00 PST) is referenced as hour 20; but for the 12:00 UTC model cycle, 20:00 UTC is referenced as hour 8 (20:00 - 12:00). Therefore, to fetch the noon PST data we need, the NAM cronjob pulls the following model hours: - For the 00:00 UTC model run time: + - Accumulation hours: 12, 24, 36, 48, 60 - Day 0: hour 20 (20:00 UTC = 12:00 PST) - Day 1: hours 42 and 45, to interpolate for hour 44 (20:00 UTC) - Day 2: hours 66 and 69, to interpolate for hour 68 (20:00 UTC) - For the 06:00 UTC model run time: + - Accumulation hours: 6, 12, 18, 24, 30, 36, 42, 48, 60 - Day 0: hour 14 (6+14 = 20:00 UTC) - Day 1: hours 36 and 39, to interpolate for hour 38 (38-24=14 hours, 14+06:00 = 20:00 UTC of the next day) - Day 2: hours 60 and 63, to interpolate for hour 62 (20:00 UTC of 2 days later) - For the 12:00 UTC model run time: + - Accumulation hours: 12, 24, 36, 48, 60, 72 - Day 0: hour 8 (12+8 = 20:00 UTC) - Day 1: hour 32 (12+32-24 = 20:00 UTC of the next day) - Day 2: hours 54 and 57, to interpolate for hour 56 (56-48 hours = 8 hours, 8+12:00 = 20:00 UTC of 2 days later) - For the 18:00 UTC model run time: + - Accumulation hours: 6, 12, 18, 24, 30, 36, 42, 48, 60, 66, 72 - Day 0: hour 2 (18+2 = 20:00 UTC) - Day 1: hour 26 (18+26-24 = 20:00 UTC of the next day) - Day 2: hours 48 and 51, to interpolate for hour 50 (50-48 hours = 2 hours, 2 + 18:00 = 20:00 UTC of 2 days later) +Note that the accumulation hours are required in order for us to calculate the cumulative precipitation across the model run which is needed when we subsequently calculate cumulative 24 hour precipitation. + #### Relevant Weather Variables -To be consistent with the weather variables that we pull from the Environment Canada models, for GFS we use comparable weather variables. There is a slight exception for wind direction and speed: Env Canada models output the separate U and V components for wind, but they also output the calculated wind speed and direction using those U, V components. NOAA only outputs the U,V components for wind - we must do the wind direction and wind speed calculations ourselves. +To be consistent with the weather variables that we pull from the Environment Canada models, for GFS we use comparable weather variables. There is a slight exception for wind direction and speed: Env Canada models output the separate U and V components for wind, but they also output the calculated wind speed and direction using those U, V components. NOAA only outputs the U,V components for wind - we must do the wind direction and wind speed calculations ourselves. The NOAA grib files for GFS contain two layers related to cumulative precipitation. Both layers are named 'apcp_sfc_0' but accumulate precipitation differently. Raster band 6 accumulates precip in 3 hour increments for odd hours and six hour increments for even hours, but we actually ignore this band. We use band 7 which accumulats precipitation across the model run. + +The NAM grib file does not contain a band for cumulative precipitation since the start of the model. This requires us to use band 6 which accumulates precipitation slightly differently for model run hours 00 and 12 versus 06 and 18. The 00 and 12 hour model runs contain cumulative precipitation in 12 hour cycles. + +For the 00 and 12 hour model runs: + +| Hour from start | Cumulative precip hours | +| --------------- | ----------------------- | +| 1 | 0 - 1 | +| 2 | 0 - 2 | +| 3 | 0 - 3 | +| ... | ... - ... | +| 12 | 0 - 12 | +| 13 | 12 - 13 | +| 14 | 12 - 14 | +| ... | ... - ... | +| 24 | 12 - 24 | +| 25 | 24 - 25 | +| ... | ... - ... | +| 36 | 24 - 36 | +| 39 | 36 - 39 | +| 42 | 36 - 42 | +| 45 | 36 - 45 | +| 48 | 36 - 48 | +| 51 | 48 - 51 | +| ... | ... - ... | + +For the 06 and 18 hour model runs: + +| Hour from start | Cumulative precip hours | +| --------------- | ----------------------- | +| 1 | 0 - 1 | +| 2 | 0 - 2 | +| 3 | 0 - 3 | +| 4 | 3 - 4 | +| 5 | 3 - 5 | +| 6 | 0 - 6 | +| 7 | 6 - 7 | +| 8 | 6 - 8 | +| 9 | 6 - 9 | +| 10 | 9 - 10 | +| 11 | 9 - 11 | +| 12 | 6 - 12 | +| 13 | 12 - 13 | +| ... | ... - ... | +| 36 | 30 - 36 | +| 39 | 36 - 39 | +| 42 | 36 - 42 | +| 45 | 42 - 45 | +| 48 | 42 - 48 | +| ... | ... - ... | + + ## Downloading Data @@ -135,15 +194,21 @@ There is a lot of data to download and analyze for these weather models, so to e Once model data has been downloaded, we store the model data for "grid subsets" relevant to our use in our `wps` Postgres database in the `model_run_grid_subset_predictions` table. Each row in this table stores temperature, RH, precipitation, wind speed, and wind direction data, as we need for Morecast. For each of these weather variables, each row contains an array of 4 data points, corresponding to the 4 corners of one square of the grid. The `prediction_model_grid_subset_id` in this table corresponds to a grid cell that contains one of BC's fire weather station locations, as stored in the `prediction_model_grid_subsets` table. When our backend inserts data into the `model_run_grid_subset_predictions` table, it also adds metadata pertinent to the specific model run. -Once the `model_run_grid_subset_predictions` table has been populated for a given model run, our backend then performs linear interpolation based on the geographic location of each weather station to calculate an approximated value for each weather variable. +Our back-end used to perform spatial interpolation using the four values in the grid subset. We no longer follow this process. In order to align with SpotWX, we now use the value from the grid cell in the grib file in which the weather station is located. These values are stored in the `weather_station_model_predictions` table. + +TODO - We need to amend our back-end code to only pull the necessary value from the grib files instead of pulling these grid subsets (4 values). + +~~Once the `model_run_grid_subset_predictions` table has been populated for a given model run, our backend then performs linear interpolation based on the geographic location of each weather station to calculate an approximated value for each weather variable.~~ -For example, a small portion of the grid for a temperature raster band of a GRIB file might look like the image below. In this example, each point on the grid is spaced 5km apart. You can see that a fire weather station is located within one cell of this grid, where the 4 corners of the cell have temperature values of 3.1°, 2.4°, 2.7°, and 2.7°C. In order to approximate the predicted temperature at the weather station's exact location, we perform linear interpolation of the 4 temperature values listed above, where the weight of each data point is inversely proportional to the distance from the weather station to that point on the grid. Consequently, the 2.4°C from the top right corner of the cell will have the highest weight as it is closest to the weather station, and the 2.7°C from the bottom left corner will have the lowest weight as it is the furthest away. +~~For example, a small portion of the grid for a temperature raster band of a GRIB file might look like the image below. In this example, each point on the grid is spaced 5km apart. You can see that a fire weather station is located within one cell of this grid, where the 4 corners of the cell have temperature values of 3.1°, 2.4°, 2.7°, and 2.7°C. In order to approximate the predicted temperature at the weather station's exact location, we perform linear interpolation of the 4 temperature values listed above, where the weight of each data point is inversely proportional to the distance from the weather station to that point on the grid. Consequently, the 2.4°C from the top right corner of the cell will have the highest weight as it is closest to the weather station, and the 2.7°C from the bottom left corner will have the lowest weight as it is the furthest away.~~ -![Location-based linear interpolation for a weather station](../../../docs/images/Grid_wx_station.png) +~~![Location-based linear interpolation for a weather station](../../../docs/images/Grid_wx_station.png)~~ -This linear interpolation process is repeated for each of the weather variables in a model run, and for each of the weather stations in BC. These calculations are stored in the `weather_station_model_predictions` table. +~~This linear interpolation process is repeated for each of the weather variables in a model run, and for each of the weather stations in BC. These calculations are stored in the `weather_station_model_predictions` table.~~ -At the present time we are only performing linear interpolation from grid subsets to weather stations based on geographic distance (because it is the simplest method of interpolation). Forecasters have requested that we also perform linear interpolation based on station elevation as the results may be more accurate - this is a pending story in our backlog. +~~At the present time we are only performing linear interpolation from grid subsets to weather stations based on geographic distance (because it is the simplest method of interpolation).~~ + +Forecasters have requested that we also perform linear interpolation based on station elevation as the results may be more accurate - this is a pending story in our backlog. ### To 12:00 PST (13:00 PDT) @@ -151,6 +216,12 @@ Fire weather forecasters in the BC Wildfire Service create daily "noon forecasts This time-based linear interpolation is done as part of the data analysis process performed when the model data is retrieved from our third-party sources. See `process_grib.py` +### Calculating predicted 24 hour precipitation + +For any given date, Morecast 2 displays the precipitation predicted to fall in the 24 hours previous to that date. For example, for August 30, 2023, the predicted precip covers a time period from August 29, 2023 20:00 UTC to August 30, 2023 20:00 UTC. The first 24 hours of a numerical weather model run present a challenge because there is no way to calculate the predicted 24 hour precip purely from the model run data. Consider a HRDPS model run on August 30 at 12:00 UTC. Morecast 2 needs to display the predicted precipitation for August 30 at 20:00, but at that point we only have 8 hours of accumulated precipitation (from 12:00 to 20:00) from our model run, we are potentially missing precipitation that could have fallen from Aug 29 20:00 to August 30 12:00 which means we're missing 16 hours of data. To work around this, we pull actual rainfall values for that 16 hour period from our hourly_actuals table which scrapes observed values fromthe WF1 API. + +Continuing on with the above example, we also need to display 24 hour predicted cumulative precip for August 31 20:00. Since our model run contains data from more than 24 hours ago, we can perform simple subtraction of accumulated precipitation to get our 24 hour predicted cumulative precip (Aug 31 20:00 - Aug 30 20:00). + ## Cronjobs We have created a separate cronjob for each model, to retrieve the data from source at various times of day, depending on how often each model is run. Our cronjobs are set to execute at randomly selected minutes within the set hour, as a courtesy to our third-party data sources so that their servers are not overwhelmed with many simultaneous requests. \ No newline at end of file diff --git a/api/app/jobs/noaa.py b/api/app/jobs/noaa.py index c9e4ba4e8..d49078c7a 100644 --- a/api/app/jobs/noaa.py +++ b/api/app/jobs/noaa.py @@ -37,7 +37,7 @@ # ------- NAM static variables ----------- # -NAM_BASE_URL = 'https://nomads.ncep.noaa.gov/cgi-bin/filter_nam_na.pl?' +NAM_BASE_URL = 'https://nomads.ncep.noaa.gov/cgi-bin/filter_nam.pl?' # -------------------------------------- # @@ -90,7 +90,13 @@ def get_noaa_subregion_filter_str() -> str: def get_nam_model_run_download_urls(download_date: datetime.datetime, model_cycle: str) -> Generator[str, None, None]: - """ Yield URLs to download NAM North America model runs """ + """ Yield URLs to download NAM North America model runs. """ + # The NAM does not accumulate precipitation throughout the model run; rather, for simplicity sake, they + # accumulate precipitation at 3 hour intervals. base_accumulation_hours are the three hour intervals that all + # four model runs have in common. The accumulation_hours are the additional three hour intervals we need to process + # for each model run in order to calculate a running total of cumulative precipitation throughout the course of a + # model run. There is some overlap between base_accumulation_hours and before_noon/after_noon, so we create a set + # which is converted back to a list. # for model_cycle 00: # for first day, need hour 20:00 UTC (13:00 PST) # Day 2: need hours 42 and 45 to interpolate for hour 44 (13:00 PST) @@ -110,24 +116,29 @@ def get_nam_model_run_download_urls(download_date: datetime.datetime, model_cycl # for first day, need hour 2 (18 + 2 = 20:00 UTC) # for second day, need hour 26 (18 + (26-24) = 20:00 UTC) # Day 3: need hours 48 and 51 to interpolate for hour 50 (13:00 PST) + base_accumulation_hours = [hour for hour in range(0, 67, 3)] if model_cycle == '00': + accumulation_hours = base_accumulation_hours noon = [20] before_noon = [42, 66] after_noon = [45, 69] elif model_cycle == '06': + accumulation_hours = base_accumulation_hours noon = [14] before_noon = [36, 60] after_noon = [39, 63] elif model_cycle == '12': + accumulation_hours = base_accumulation_hours + [66, 69, 72, 75] noon = [8, 32] - before_noon = [54] - after_noon = [57] + before_noon = [54, 78] + after_noon = [57, 81] elif model_cycle == '18': + accumulation_hours = base_accumulation_hours + [66, 69] noon = [2, 26] - before_noon = [48] - after_noon = [51] + before_noon = [48, 72] + after_noon = [51, 75] - all_hours = noon + before_noon + after_noon + all_hours = list(set(accumulation_hours + noon + before_noon + after_noon)) # sort list purely for human convenience when debugging. Functionally it doesn't matter all_hours.sort() @@ -138,7 +149,7 @@ def get_nam_model_run_download_urls(download_date: datetime.datetime, model_cycl for fcst_hour in all_hours: hh = format(fcst_hour, '02d') - filter_str = f'dir=%2Fnam.{year_mo_date}&file=nam.t{model_cycle}z.awip32{hh}.tm00.grib2&' + filter_str = f'dir=%2Fnam.{year_mo_date}&file=nam.t{model_cycle}z.awphys{hh}.tm00.grib2&' wx_vars_filter_str = get_noaa_wx_variables_filter_str() levels_filter_str = get_noaa_levels_filter_str() subregion_filter_str = get_noaa_subregion_filter_str() From 3472205c37f5ff57d0fd91599fae0f4c67e4d98d Mon Sep 17 00:00:00 2001 From: Darren Boss Date: Tue, 5 Sep 2023 16:18:52 -0700 Subject: [PATCH 05/11] Fix NAM accumulation Fix 00 and 12 hour accumulation interval --- api/app/jobs/noaa.py | 56 ++++++++++++++------------------------------ 1 file changed, 18 insertions(+), 38 deletions(-) diff --git a/api/app/jobs/noaa.py b/api/app/jobs/noaa.py index d49078c7a..c87026970 100644 --- a/api/app/jobs/noaa.py +++ b/api/app/jobs/noaa.py @@ -91,52 +91,32 @@ def get_noaa_subregion_filter_str() -> str: def get_nam_model_run_download_urls(download_date: datetime.datetime, model_cycle: str) -> Generator[str, None, None]: """ Yield URLs to download NAM North America model runs. """ - # The NAM does not accumulate precipitation throughout the model run; rather, for simplicity sake, they - # accumulate precipitation at 3 hour intervals. base_accumulation_hours are the three hour intervals that all - # four model runs have in common. The accumulation_hours are the additional three hour intervals we need to process - # for each model run in order to calculate a running total of cumulative precipitation throughout the course of a - # model run. There is some overlap between base_accumulation_hours and before_noon/after_noon, so we create a set - # which is converted back to a list. - # for model_cycle 00: - # for first day, need hour 20:00 UTC (13:00 PST) - # Day 2: need hours 42 and 45 to interpolate for hour 44 (13:00 PST) - # Day 3: need hours 66 and 69 to interpolate for hour 68 (13:00 PST) - # - # for model_cycle 06: - # for first day, need hour 14 (14+6 = 20:00 UTC) - # Day 2: need hours 36 and 39 to interpolate for hour 38 (13:00 PST) - # Day 3: need hours 60 and 63 to interpolate for hour 62 (13:00 PST) - # - # for model_cycle 12: - # for first day, need hour 8 (12+8 = 20:00 UTC) - # for second day, need hour 32 (12 + (32-24) = 20:00 UTC) - # Day 3: need hours 54 and 57 to interpolate for hour 56 (13:00 PST) - # - # for model_cycle 18: - # for first day, need hour 2 (18 + 2 = 20:00 UTC) - # for second day, need hour 26 (18 + (26-24) = 20:00 UTC) - # Day 3: need hours 48 and 51 to interpolate for hour 50 (13:00 PST) - base_accumulation_hours = [hour for hour in range(0, 67, 3)] + # The NAM does not accumulate precipitation throughout the model run. The 00 and 12 hour model runs acculmulate + # precip at 12 hour intervals and the 06 and 18 hour model runs accumulate precip at 3 hour intervals. + # The accumulation_hours represent the hours needed in order to calculate accumulated precipitation for the model + # run. The noon variables contain a list of 20:00 UTC time for which a prediction exits for the NAM. The before_noon + # and after_noon variables contain lists of 18:00 UTC times and 21:00 UTC times needed for interpolating to 20:00 + # UTC as exact 20:00 UTC predictions do not exist beyond hour 36 of the model run. if model_cycle == '00': - accumulation_hours = base_accumulation_hours + accumulation_hours = [hour for hour in range(0, 61, 12)] noon = [20] - before_noon = [42, 66] - after_noon = [45, 69] + before_noon = [18, 42, 66] + after_noon = [21, 45, 69] elif model_cycle == '06': - accumulation_hours = base_accumulation_hours + accumulation_hours = [hour for hour in range(0, 67, 3)] noon = [14] - before_noon = [36, 60] - after_noon = [39, 63] + before_noon = [12, 36, 60] + after_noon = [15, 39, 63] elif model_cycle == '12': - accumulation_hours = base_accumulation_hours + [66, 69, 72, 75] + accumulation_hours = [hour for hour in range(0, 73, 12)] noon = [8, 32] - before_noon = [54, 78] - after_noon = [57, 81] + before_noon = [6, 30, 54, 78] + after_noon = [9, 33, 57, 81] elif model_cycle == '18': - accumulation_hours = base_accumulation_hours + [66, 69] + accumulation_hours = [hour for hour in range(0, 70, 3)] noon = [2, 26] - before_noon = [48, 72] - after_noon = [51, 75] + before_noon = [0, 24, 48, 72] + after_noon = [3, 27, 51, 75] all_hours = list(set(accumulation_hours + noon + before_noon + after_noon)) # sort list purely for human convenience when debugging. Functionally it doesn't matter From 6e873d78728e9e9c660e83cfa3b935440b7bd86f Mon Sep 17 00:00:00 2001 From: Darren Boss Date: Tue, 5 Sep 2023 16:22:35 -0700 Subject: [PATCH 06/11] Missed a file --- api/app/jobs/common_model_fetchers.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/api/app/jobs/common_model_fetchers.py b/api/app/jobs/common_model_fetchers.py index a0215c7de..9550ece88 100644 --- a/api/app/jobs/common_model_fetchers.py +++ b/api/app/jobs/common_model_fetchers.py @@ -360,13 +360,18 @@ def _process_model_run_for_station(self, cumulative_precip = [0.0, 0.0, 0.0, 0.0] # Iterate through all the predictions. prev_prediction = None + + # 00 and 12 hour model runs accumulate precipitation in 12 hour intervals, 06 and 18 hour accumlate in + # 3 hour intervals + model_run_hour = model_run.prediction_run_timestamp.hour + nam_accumulation_interval = 3 if model_run_hour == 6 or model_run_hour == 18 else 12 for prediction in query: if model_type == ModelEnum.NAM: # Add up the cumulative precipitation over the course of the model run if prediction.apcp_sfc_0 is None: prediction.apcp_sfc_0 = [0.0, 0.0, 0.0, 0.0] temp_precip = numpy.add(cumulative_precip, prediction.apcp_sfc_0) - if prediction.prediction_timestamp.hour % self.NAM_ACCUMULATION_INTERVAL == 0: + if prediction.prediction_timestamp.hour % nam_accumulation_interval == 0: cumulative_precip = temp_precip prediction.apcp_sfc_0 = temp_precip if (prev_prediction is not None From c96efe0b450f551f0db4f521ed53abe6e747d2bd Mon Sep 17 00:00:00 2001 From: Darren Boss Date: Wed, 6 Sep 2023 09:37:55 -0700 Subject: [PATCH 07/11] Update unit tests --- api/app/jobs/noaa.py | 2 +- api/app/tests/weather_models/test_noaa_nam.py | 45 ++++++++++--------- 2 files changed, 25 insertions(+), 22 deletions(-) diff --git a/api/app/jobs/noaa.py b/api/app/jobs/noaa.py index c87026970..8c4fbcf82 100644 --- a/api/app/jobs/noaa.py +++ b/api/app/jobs/noaa.py @@ -203,7 +203,7 @@ def parse_gfs_url_for_timestamps(url: str): def parse_nam_url_for_timestamps(url: str): """ Interpret the model_run_timestamp and prediction_timestamp from a NAM model's URL """ - # sample URL: 'https://nomads.ncep.noaa.gov/cgi-bin/filter_nam_na.pl?dir=%2Fnam.20230414&file=nam.t00z.awip3220.tm00.grib2&var_APCP=on&var_RH=on&var_TMP=on&var_UGRD=on&var_VGRD=on&lev_surface=on&lev_2_m_above_ground=on&lev_10_m_above_ground=on&subregion=&toplat=60&leftlon=-139&rightlon=-114&bottomlat=48' + # sample URL: 'https://nomads.ncep.noaa.gov/cgi-bin/filter_nam_na.pl?dir=%2Fnam.20230414&file=nam.t00z.awphys20.tm00.grib2&var_APCP=on&var_RH=on&var_TMP=on&var_UGRD=on&var_VGRD=on&lev_surface=on&lev_2_m_above_ground=on&lev_10_m_above_ground=on&subregion=&toplat=60&leftlon=-139&rightlon=-114&bottomlat=48' query = urlsplit(url).query params = parse_qs(query) model_run_date = params['dir'][0].split('.')[1] diff --git a/api/app/tests/weather_models/test_noaa_nam.py b/api/app/tests/weather_models/test_noaa_nam.py index cfbb910d6..63931d3fa 100644 --- a/api/app/tests/weather_models/test_noaa_nam.py +++ b/api/app/tests/weather_models/test_noaa_nam.py @@ -10,52 +10,55 @@ def test_get_nam_model_run_download_urls_for_00_utc(): # March 2 at 00:00 UTC is equivalent to March 1 in Eastern timezone - should return URL for previous day - # for the 00 UTC run, there should be 5 urls (for hours 20, 42, 45, 66, 69) - expected_num_of_urls = 5 + # for the 00 UTC run, there should be 13 urls (for hours 0, 12, 18, 20, 21, 24, 36, 42, 45, 48, 60, 66, 69) + expected_num_of_urls = 13 actual_urls = list(noaa.get_nam_model_run_download_urls(datetime(2023, 3, 2, 00, tzinfo=timezone.utc), '00')) assert len(actual_urls) == expected_num_of_urls - assert actual_urls[0] == noaa.NAM_BASE_URL + 'dir=%2Fnam.20230301&file=nam.t00z.awip3220.tm00.grib2&var_APCP=on&var_RH=on&var_TMP=on&var_UGRD=on&var_VGRD=on&lev_surface=on&lev_2_m_above_ground=on&lev_10_m_above_ground=on&subregion=&toplat=60&leftlon=-139&rightlon=-114&bottomlat=48' - assert actual_urls[-1] == noaa.NAM_BASE_URL + 'dir=%2Fnam.20230301&file=nam.t00z.awip3269.tm00.grib2&var_APCP=on&var_RH=on&var_TMP=on&var_UGRD=on&var_VGRD=on&lev_surface=on&lev_2_m_above_ground=on&lev_10_m_above_ground=on&subregion=&toplat=60&leftlon=-139&rightlon=-114&bottomlat=48' + assert actual_urls[0] == noaa.NAM_BASE_URL + 'dir=%2Fnam.20230301&file=nam.t00z.awphys00.tm00.grib2&var_APCP=on&var_RH=on&var_TMP=on&var_UGRD=on&var_VGRD=on&lev_surface=on&lev_2_m_above_ground=on&lev_10_m_above_ground=on&subregion=&toplat=60&leftlon=-139&rightlon=-114&bottomlat=48' + assert actual_urls[-1] == noaa.NAM_BASE_URL + 'dir=%2Fnam.20230301&file=nam.t00z.awphys69.tm00.grib2&var_APCP=on&var_RH=on&var_TMP=on&var_UGRD=on&var_VGRD=on&lev_surface=on&lev_2_m_above_ground=on&lev_10_m_above_ground=on&subregion=&toplat=60&leftlon=-139&rightlon=-114&bottomlat=48' def test_get_nam_model_run_download_urls_for_06_utc(): # Feb 15 at 06:00 UTC is still Feb 15 (at 02:00) in Eastern timezone - should return URL for same day - # for the 06 UTC run, there should be 5 urls (for hours 14, 36, 39, 60, 63) - expected_num_of_urls = 5 + # for the 06 UTC run, there should be 24 urls (for hours 0, 3, 6, 9, 12, 14, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, + # 45, 48, 51, 54, 57, 60, 63, 66) + expected_num_of_urls = 24 actual_urls = list(noaa.get_nam_model_run_download_urls(datetime(2023, 2, 15, 6, tzinfo=timezone.utc), '06')) assert len(actual_urls) == expected_num_of_urls assert actual_urls[0] == noaa.NAM_BASE_URL + \ - 'dir=%2Fnam.20230215&file=nam.t06z.awip3214.tm00.grib2&var_APCP=on&var_RH=on&var_TMP=on&var_UGRD=on&var_VGRD=on&lev_surface=on&lev_2_m_above_ground=on&lev_10_m_above_ground=on&subregion=&toplat=60&leftlon=-139&rightlon=-114&bottomlat=48' - assert actual_urls[1] == noaa.NAM_BASE_URL + 'dir=%2Fnam.20230215&file=nam.t06z.awip3236.tm00.grib2&var_APCP=on&var_RH=on&var_TMP=on&var_UGRD=on&var_VGRD=on&lev_surface=on&lev_2_m_above_ground=on&lev_10_m_above_ground=on&subregion=&toplat=60&leftlon=-139&rightlon=-114&bottomlat=48' - assert actual_urls[-1] == noaa.NAM_BASE_URL + 'dir=%2Fnam.20230215&file=nam.t06z.awip3263.tm00.grib2&var_APCP=on&var_RH=on&var_TMP=on&var_UGRD=on&var_VGRD=on&lev_surface=on&lev_2_m_above_ground=on&lev_10_m_above_ground=on&subregion=&toplat=60&leftlon=-139&rightlon=-114&bottomlat=48' + 'dir=%2Fnam.20230215&file=nam.t06z.awphys00.tm00.grib2&var_APCP=on&var_RH=on&var_TMP=on&var_UGRD=on&var_VGRD=on&lev_surface=on&lev_2_m_above_ground=on&lev_10_m_above_ground=on&subregion=&toplat=60&leftlon=-139&rightlon=-114&bottomlat=48' + assert actual_urls[1] == noaa.NAM_BASE_URL + 'dir=%2Fnam.20230215&file=nam.t06z.awphys03.tm00.grib2&var_APCP=on&var_RH=on&var_TMP=on&var_UGRD=on&var_VGRD=on&lev_surface=on&lev_2_m_above_ground=on&lev_10_m_above_ground=on&subregion=&toplat=60&leftlon=-139&rightlon=-114&bottomlat=48' + assert actual_urls[-1] == noaa.NAM_BASE_URL + 'dir=%2Fnam.20230215&file=nam.t06z.awphys66.tm00.grib2&var_APCP=on&var_RH=on&var_TMP=on&var_UGRD=on&var_VGRD=on&lev_surface=on&lev_2_m_above_ground=on&lev_10_m_above_ground=on&subregion=&toplat=60&leftlon=-139&rightlon=-114&bottomlat=48' def test_get_nam_model_run_download_urls_for_12_utc(): # March 9 at 12:00 UTC is still March 9 in Eastern timezone - should return URL for same day - # for the 12 UTC run, there should be 4 urls (for hours 08, 32, 54, 57) - expected_num_of_urls = 4 + # for the 12 UTC run, there should be 17 urls (for hours 0, 6, 8, 9, 12, 24, 30, 32, 33, 36, 48, 54, 57, 60, 72, 78, + # 81) + expected_num_of_urls = 17 actual_urls = list(noaa.get_nam_model_run_download_urls(datetime(2023, 3, 9, 12, tzinfo=timezone.utc), '12')) assert len(actual_urls) == expected_num_of_urls - assert actual_urls[0] == noaa.NAM_BASE_URL + 'dir=%2Fnam.20230309&file=nam.t12z.awip3208.tm00.grib2&var_APCP=on&var_RH=on&var_TMP=on&var_UGRD=on&var_VGRD=on&lev_surface=on&lev_2_m_above_ground=on&lev_10_m_above_ground=on&subregion=&toplat=60&leftlon=-139&rightlon=-114&bottomlat=48' - assert actual_urls[1] == noaa.NAM_BASE_URL + 'dir=%2Fnam.20230309&file=nam.t12z.awip3232.tm00.grib2&var_APCP=on&var_RH=on&var_TMP=on&var_UGRD=on&var_VGRD=on&lev_surface=on&lev_2_m_above_ground=on&lev_10_m_above_ground=on&subregion=&toplat=60&leftlon=-139&rightlon=-114&bottomlat=48' - assert actual_urls[-1] == noaa.NAM_BASE_URL + 'dir=%2Fnam.20230309&file=nam.t12z.awip3257.tm00.grib2&var_APCP=on&var_RH=on&var_TMP=on&var_UGRD=on&var_VGRD=on&lev_surface=on&lev_2_m_above_ground=on&lev_10_m_above_ground=on&subregion=&toplat=60&leftlon=-139&rightlon=-114&bottomlat=48' + assert actual_urls[0] == noaa.NAM_BASE_URL + 'dir=%2Fnam.20230309&file=nam.t12z.awphys00.tm00.grib2&var_APCP=on&var_RH=on&var_TMP=on&var_UGRD=on&var_VGRD=on&lev_surface=on&lev_2_m_above_ground=on&lev_10_m_above_ground=on&subregion=&toplat=60&leftlon=-139&rightlon=-114&bottomlat=48' + assert actual_urls[1] == noaa.NAM_BASE_URL + 'dir=%2Fnam.20230309&file=nam.t12z.awphys06.tm00.grib2&var_APCP=on&var_RH=on&var_TMP=on&var_UGRD=on&var_VGRD=on&lev_surface=on&lev_2_m_above_ground=on&lev_10_m_above_ground=on&subregion=&toplat=60&leftlon=-139&rightlon=-114&bottomlat=48' + assert actual_urls[-1] == noaa.NAM_BASE_URL + 'dir=%2Fnam.20230309&file=nam.t12z.awphys81.tm00.grib2&var_APCP=on&var_RH=on&var_TMP=on&var_UGRD=on&var_VGRD=on&lev_surface=on&lev_2_m_above_ground=on&lev_10_m_above_ground=on&subregion=&toplat=60&leftlon=-139&rightlon=-114&bottomlat=48' def test_get_nam_model_run_download_urls_for_18_utc(): # Jan 27 at 18:00 UTC is still Jan 27 in Eastern timezone - should return URL for same day - # for the 18 UTC run, there should be 4 urls (for hours 02, 26, 48, 51) - expected_num_of_urls = 4 + # for the 18 UTC run, there should be 28 urls (for hours 0, 2, 3, 6, 9, 12, 15, 18, 21, 24, 26, 27, 30, 33, 36, 39, + # 42, 45, 48, 51, 54, 57, 60, 63, 66, 69, 72, 75) + expected_num_of_urls = 28 actual_urls = list(noaa.get_nam_model_run_download_urls(datetime(2023, 1, 27, 18, tzinfo=timezone.utc), '18')) assert len(actual_urls) == expected_num_of_urls - assert actual_urls[0] == noaa.NAM_BASE_URL + 'dir=%2Fnam.20230127&file=nam.t18z.awip3202.tm00.grib2&var_APCP=on&var_RH=on&var_TMP=on&var_UGRD=on&var_VGRD=on&lev_surface=on&lev_2_m_above_ground=on&lev_10_m_above_ground=on&subregion=&toplat=60&leftlon=-139&rightlon=-114&bottomlat=48' - assert actual_urls[1] == noaa.NAM_BASE_URL + 'dir=%2Fnam.20230127&file=nam.t18z.awip3226.tm00.grib2&var_APCP=on&var_RH=on&var_TMP=on&var_UGRD=on&var_VGRD=on&lev_surface=on&lev_2_m_above_ground=on&lev_10_m_above_ground=on&subregion=&toplat=60&leftlon=-139&rightlon=-114&bottomlat=48' - assert actual_urls[-1] == noaa.NAM_BASE_URL + 'dir=%2Fnam.20230127&file=nam.t18z.awip3251.tm00.grib2&var_APCP=on&var_RH=on&var_TMP=on&var_UGRD=on&var_VGRD=on&lev_surface=on&lev_2_m_above_ground=on&lev_10_m_above_ground=on&subregion=&toplat=60&leftlon=-139&rightlon=-114&bottomlat=48' + assert actual_urls[0] == noaa.NAM_BASE_URL + 'dir=%2Fnam.20230127&file=nam.t18z.awphys00.tm00.grib2&var_APCP=on&var_RH=on&var_TMP=on&var_UGRD=on&var_VGRD=on&lev_surface=on&lev_2_m_above_ground=on&lev_10_m_above_ground=on&subregion=&toplat=60&leftlon=-139&rightlon=-114&bottomlat=48' + assert actual_urls[1] == noaa.NAM_BASE_URL + 'dir=%2Fnam.20230127&file=nam.t18z.awphys02.tm00.grib2&var_APCP=on&var_RH=on&var_TMP=on&var_UGRD=on&var_VGRD=on&lev_surface=on&lev_2_m_above_ground=on&lev_10_m_above_ground=on&subregion=&toplat=60&leftlon=-139&rightlon=-114&bottomlat=48' + assert actual_urls[-1] == noaa.NAM_BASE_URL + 'dir=%2Fnam.20230127&file=nam.t18z.awphys75.tm00.grib2&var_APCP=on&var_RH=on&var_TMP=on&var_UGRD=on&var_VGRD=on&lev_surface=on&lev_2_m_above_ground=on&lev_10_m_above_ground=on&subregion=&toplat=60&leftlon=-139&rightlon=-114&bottomlat=48' def test_parse_url_for_timestamps_simple(): """ simple test case for noaa.parse_url_for_timestamps(): model_run_timestamp and prediction_timestamp are on the same day """ - url = noaa.NAM_BASE_URL + 'dir=%2Fnam.20230413&file=nam.t12z.awip3206.tm00.grib2&var_APCP=on&var_RH=on&var_TMP=on&var_UGRD=on&var_VGRD=on&lev_surface=on&lev_2_m_above_ground=on&lev_10_m_above_ground=on&subregion=&toplat=60&leftlon=-139&rightlon=-114&bottomlat=48' + url = noaa.NAM_BASE_URL + 'dir=%2Fnam.20230413&file=nam.t12z.awphys06.tm00.grib2&var_APCP=on&var_RH=on&var_TMP=on&var_UGRD=on&var_VGRD=on&lev_surface=on&lev_2_m_above_ground=on&lev_10_m_above_ground=on&subregion=&toplat=60&leftlon=-139&rightlon=-114&bottomlat=48' expected_model_run_timestamp = datetime(2023, 4, 13, 12, 0, tzinfo=timezone.utc) expected_prediction_timestamp = datetime(2023, 4, 13, 18, 0, tzinfo=timezone.utc) actual_model_run_timestamp, actual_prediction_timestamp = noaa.parse_nam_url_for_timestamps(url) @@ -66,7 +69,7 @@ def test_parse_url_for_timestamps_simple(): def test_parse_url_for_timestamps_complex(): """ more complex test case for noaa.parse_url_for_timestamps(): model_run_timestamp and prediction_timestamp are on different days """ - url = noaa.NAM_BASE_URL + 'dir=%2Fnam.20230413&file=nam.t18z.awip3227.grib2&var_APCP=on&var_RH=on&var_TMP=on&var_UGRD=on&var_VGRD=on&lev_surface=on&lev_2_m_above_ground=on&lev_10_m_above_ground=on&subregion=&toplat=60&leftlon=-139&rightlon=-114&bottomlat=48' + url = noaa.NAM_BASE_URL + 'dir=%2Fnam.20230413&file=nam.t18z.awphys27.grib2&var_APCP=on&var_RH=on&var_TMP=on&var_UGRD=on&var_VGRD=on&lev_surface=on&lev_2_m_above_ground=on&lev_10_m_above_ground=on&subregion=&toplat=60&leftlon=-139&rightlon=-114&bottomlat=48' expected_model_run_timestamp = datetime(2023, 4, 13, 18, 0, tzinfo=timezone.utc) expected_prediction_timestamp = datetime(2023, 4, 14, 21, 0, tzinfo=timezone.utc) actual_model_run_timestamp, actual_prediction_timestamp = noaa.parse_nam_url_for_timestamps(url) From 0df815f47ceba6b12411cb79a71b67cbcbaf64e6 Mon Sep 17 00:00:00 2001 From: Darren Boss Date: Wed, 6 Sep 2023 10:17:31 -0700 Subject: [PATCH 08/11] Clean up and docs --- api/app/jobs/common_model_fetchers.py | 4 +-- api/app/jobs/model_data.md | 52 +++++++++++++-------------- 2 files changed, 27 insertions(+), 29 deletions(-) diff --git a/api/app/jobs/common_model_fetchers.py b/api/app/jobs/common_model_fetchers.py index 9550ece88..3c72c7b78 100644 --- a/api/app/jobs/common_model_fetchers.py +++ b/api/app/jobs/common_model_fetchers.py @@ -172,9 +172,6 @@ class ModelValueProcessor: """ Iterate through model runs that have completed, and calculate the interpolated weather predictions. """ - # The NAM reports cumulative precipitation in three hour intervals. - NAM_ACCUMULATION_INTERVAL = 3 - def __init__(self, session, station_source: StationSourceEnum = StationSourceEnum.UNSPECIFIED): """ Prepare variables we're going to use throughout """ self.session = session @@ -372,6 +369,7 @@ def _process_model_run_for_station(self, prediction.apcp_sfc_0 = [0.0, 0.0, 0.0, 0.0] temp_precip = numpy.add(cumulative_precip, prediction.apcp_sfc_0) if prediction.prediction_timestamp.hour % nam_accumulation_interval == 0: + # If we're on an 'accumulation interval', update the cumulative precip cumulative_precip = temp_precip prediction.apcp_sfc_0 = temp_precip if (prev_prediction is not None diff --git a/api/app/jobs/model_data.md b/api/app/jobs/model_data.md index 6e3189b90..124fa6851 100644 --- a/api/app/jobs/model_data.md +++ b/api/app/jobs/model_data.md @@ -80,7 +80,7 @@ Bottom latitude: 48 #### GFS -We fetch GFS model data on a 0.25° scale (roughly equivalent to 24km at the centre of BC). +We fetch GFS model data on a 0.25° scale (roughly equivalent to 24km at the centre of BC). ##### Model Run Cycles @@ -103,28 +103,28 @@ NAM model data is published on a Polar Stereographic projection at a 32km resolu ##### Model Run Cycles -The NAM model makes predictions for every hour for the first 36 hours, and then on a 3-hourly schedule to 84 hours [https://mag.ncep.noaa.gov/model-guidance-model-parameter.php?group=Model%20Guidance&model=NAM&area=NAMER&ps=area#]. As with the GFS model, we sometimes need to fetch data for 2 different hour offsets and then perform linear interpolation in order to have a weather prediction for 12:00 PST. The file naming convention that NAM uses indicates the model runtime as hours in UTC, and indexes that hour as 00. All subsequent hours in the model run are referenced as offsets of the 00 hour index. This means that for the 00:00 UTC model cycle, 20:00 UTC (or 12:00 PST) is referenced as hour 20; but for the 12:00 UTC model cycle, 20:00 UTC is referenced as hour 8 (20:00 - 12:00). Therefore, to fetch the noon PST data we need, the NAM cronjob pulls the following model hours: +The NAM model makes predictions for every hour for the first 36 hours, and then on a 3-hourly schedule to 84 hours [https://mag.ncep.noaa.gov/model-guidance-model-parameter.php?group=Model%20Guidance&model=NAM&area=NAMER&ps=area#]. As with the GFS model, we sometimes need to fetch data for 2 different hour offsets and then perform linear interpolation in order to have a weather prediction for 12:00 PST. The file naming convention that NAM uses indicates the model runtime as hours in UTC, and indexes that hour as 00. All subsequent hours in the model run are referenced as offsets of the 00 hour index. This means that for the 00:00 UTC model cycle, 20:00 UTC (or 12:00 PST) is referenced as hour 20; but for the 12:00 UTC model cycle, 20:00 UTC is referenced as hour 8 (20:00 - 12:00). Therefore, to fetch the noon PST data we need, the NAM cronjob pulls the following model hours. noon hours are equivalent to 20:00 UTC. before_noon hours are 18:00 UTC and used for linear interpolation along with after_noon hours which are 21:00 UTC. Accumulation hours represent the prediction hours required in oorder to calcaulte accumulative precipitation throughout the model run. - For the 00:00 UTC model run time: - - Accumulation hours: 12, 24, 36, 48, 60 - - Day 0: hour 20 (20:00 UTC = 12:00 PST) - - Day 1: hours 42 and 45, to interpolate for hour 44 (20:00 UTC) - - Day 2: hours 66 and 69, to interpolate for hour 68 (20:00 UTC) + - accumulation_hours = 0, 12, 24, 36, 48, 60 + - noon = 20 + - before_noon = 18, 42, 66 + - after_noon = 21, 45, 69 - For the 06:00 UTC model run time: - - Accumulation hours: 6, 12, 18, 24, 30, 36, 42, 48, 60 - - Day 0: hour 14 (6+14 = 20:00 UTC) - - Day 1: hours 36 and 39, to interpolate for hour 38 (38-24=14 hours, 14+06:00 = 20:00 UTC of the next day) - - Day 2: hours 60 and 63, to interpolate for hour 62 (20:00 UTC of 2 days later) + - accumulation_hours = 0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45, 48, 51, 54, 57, 60, 63, 66 + - noon = 14 + - before_noon = 12, 36, 60 + - after_noon = 15, 39, 63 - For the 12:00 UTC model run time: - - Accumulation hours: 12, 24, 36, 48, 60, 72 - - Day 0: hour 8 (12+8 = 20:00 UTC) - - Day 1: hour 32 (12+32-24 = 20:00 UTC of the next day) - - Day 2: hours 54 and 57, to interpolate for hour 56 (56-48 hours = 8 hours, 8+12:00 = 20:00 UTC of 2 days later) + - accumulation_hours = 0, 12, 24, 36, 48, 60, 72 + - noon = 8, 32 + - before_noon = 6, 30, 54, 78 + - after_noon = 9, 33, 57, 81 - For the 18:00 UTC model run time: - - Accumulation hours: 6, 12, 18, 24, 30, 36, 42, 48, 60, 66, 72 - - Day 0: hour 2 (18+2 = 20:00 UTC) - - Day 1: hour 26 (18+26-24 = 20:00 UTC of the next day) - - Day 2: hours 48 and 51, to interpolate for hour 50 (50-48 hours = 2 hours, 2 + 18:00 = 20:00 UTC of 2 days later) + - accumulation_hours = 0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45, 48, 51, 54, 57, 60, 63, 66, 69, 72, 75 + - noon = [2, 26] + - before_noon = [0, 24, 48, 72] + - after_noon = [3, 27, 51, 75] Note that the accumulation hours are required in order for us to calculate the cumulative precipitation across the model run which is needed when we subsequently calculate cumulative 24 hour precipitation. @@ -132,7 +132,7 @@ Note that the accumulation hours are required in order for us to calculate the c To be consistent with the weather variables that we pull from the Environment Canada models, for GFS we use comparable weather variables. There is a slight exception for wind direction and speed: Env Canada models output the separate U and V components for wind, but they also output the calculated wind speed and direction using those U, V components. NOAA only outputs the U,V components for wind - we must do the wind direction and wind speed calculations ourselves. The NOAA grib files for GFS contain two layers related to cumulative precipitation. Both layers are named 'apcp_sfc_0' but accumulate precipitation differently. Raster band 6 accumulates precip in 3 hour increments for odd hours and six hour increments for even hours, but we actually ignore this band. We use band 7 which accumulats precipitation across the model run. -The NAM grib file does not contain a band for cumulative precipitation since the start of the model. This requires us to use band 6 which accumulates precipitation slightly differently for model run hours 00 and 12 versus 06 and 18. The 00 and 12 hour model runs contain cumulative precipitation in 12 hour cycles. +The NAM grib file does not contain a band for cumulative precipitation since the start of the model. This requires us to use band 6 which accumulates precipitation slightly differently for model run hours 00 and 12 versus 06 and 18. The 00 and 12 hour model runs contain cumulative precipitation in 12 hour cycles. For the 00 and 12 hour model runs: @@ -157,7 +157,7 @@ For the 00 and 12 hour model runs: | 51 | 48 - 51 | | ... | ... - ... | -For the 06 and 18 hour model runs: +For the 06 and 18 hour model runs precipitation accumlates in 3 hour intervals: | Hour from start | Cumulative precip hours | | --------------- | ----------------------- | @@ -166,20 +166,20 @@ For the 06 and 18 hour model runs: | 3 | 0 - 3 | | 4 | 3 - 4 | | 5 | 3 - 5 | -| 6 | 0 - 6 | +| 6 | 3 - 6 | | 7 | 6 - 7 | | 8 | 6 - 8 | | 9 | 6 - 9 | | 10 | 9 - 10 | | 11 | 9 - 11 | -| 12 | 6 - 12 | +| 12 | 9 - 12 | | 13 | 12 - 13 | | ... | ... - ... | -| 36 | 30 - 36 | -| 39 | 36 - 39 | -| 42 | 36 - 42 | +| 36 | 33 - 36 | +| 39 | 36 - 39 | +| 42 | 39 - 42 | | 45 | 42 - 45 | -| 48 | 42 - 48 | +| 48 | 45 - 48 | | ... | ... - ... | From d6305ae61b130f69ca702a67bd197336e3448dec Mon Sep 17 00:00:00 2001 From: Darren Boss Date: Thu, 7 Sep 2023 08:23:11 -0700 Subject: [PATCH 09/11] Typo --- api/app/jobs/model_data.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/app/jobs/model_data.md b/api/app/jobs/model_data.md index 124fa6851..b062c38c3 100644 --- a/api/app/jobs/model_data.md +++ b/api/app/jobs/model_data.md @@ -218,7 +218,7 @@ This time-based linear interpolation is done as part of the data analysis proces ### Calculating predicted 24 hour precipitation -For any given date, Morecast 2 displays the precipitation predicted to fall in the 24 hours previous to that date. For example, for August 30, 2023, the predicted precip covers a time period from August 29, 2023 20:00 UTC to August 30, 2023 20:00 UTC. The first 24 hours of a numerical weather model run present a challenge because there is no way to calculate the predicted 24 hour precip purely from the model run data. Consider a HRDPS model run on August 30 at 12:00 UTC. Morecast 2 needs to display the predicted precipitation for August 30 at 20:00, but at that point we only have 8 hours of accumulated precipitation (from 12:00 to 20:00) from our model run, we are potentially missing precipitation that could have fallen from Aug 29 20:00 to August 30 12:00 which means we're missing 16 hours of data. To work around this, we pull actual rainfall values for that 16 hour period from our hourly_actuals table which scrapes observed values fromthe WF1 API. +For any given date, Morecast 2 displays the precipitation predicted to fall in the 24 hours previous to that date. For example, for August 30, 2023, the predicted precip covers a time period from August 29, 2023 20:00 UTC to August 30, 2023 20:00 UTC. The first 24 hours of a numerical weather model run present a challenge because there is no way to calculate the predicted 24 hour precip purely from the model run data. Consider a HRDPS model run on August 30 at 12:00 UTC. Morecast 2 needs to display the predicted precipitation for August 30 at 20:00, but at that point we only have 8 hours of accumulated precipitation (from 12:00 to 20:00) from our model run, we are potentially missing precipitation that could have fallen from Aug 29 20:00 to August 30 12:00 which means we're missing 16 hours of data. To work around this, we pull actual precip values for that 16 hour period from our hourly_actuals table which scrapes observed values from the WF1 API. Continuing on with the above example, we also need to display 24 hour predicted cumulative precip for August 31 20:00. Since our model run contains data from more than 24 hours ago, we can perform simple subtraction of accumulated precipitation to get our 24 hour predicted cumulative precip (Aug 31 20:00 - Aug 30 20:00). From 10627ee051f1deeffaaabafc7b0afd1970b2ca63 Mon Sep 17 00:00:00 2001 From: Darren Boss Date: Thu, 7 Sep 2023 10:47:58 -0700 Subject: [PATCH 10/11] Factor out NAM precip calcs Add unit tests --- api/app/jobs/common_model_fetchers.py | 31 +++++---- .../tests/jobs/test_common_model_fetchers.py | 64 +++++++++++++++++++ 2 files changed, 82 insertions(+), 13 deletions(-) create mode 100644 api/app/tests/jobs/test_common_model_fetchers.py diff --git a/api/app/jobs/common_model_fetchers.py b/api/app/jobs/common_model_fetchers.py index 3c72c7b78..b98b45363 100644 --- a/api/app/jobs/common_model_fetchers.py +++ b/api/app/jobs/common_model_fetchers.py @@ -168,6 +168,20 @@ def apply_data_retention_policy(): delete_weather_station_model_predictions(session, oldest_to_keep) +def accumulate_nam_precipitation(nam_cumulative_precip: list[float], prediction: ModelRunGridSubsetPrediction, model_run_hour: int): + """ Calculate overall cumulative precip and cumulative precip for the current prediction. """ + # 00 and 12 hour model runs accumulate precipitation in 12 hour intervals, 06 and 18 hour accumulate in + # 3 hour intervals + nam_accumulation_interval = 3 if model_run_hour == 6 or model_run_hour == 18 else 12 + cumulative_precip = nam_cumulative_precip + prediction_precip = prediction.apcp_sfc_0 or [0.0, 0.0, 0.0, 0.0] + current_precip = numpy.add(nam_cumulative_precip, prediction_precip) + if prediction.prediction_timestamp.hour % nam_accumulation_interval == 0: + # If we're on an 'accumulation interval', update the cumulative precip + cumulative_precip = current_precip + return (cumulative_precip, current_precip) + + class ModelValueProcessor: """ Iterate through model runs that have completed, and calculate the interpolated weather predictions. """ @@ -354,24 +368,15 @@ def _process_model_run_for_station(self, query = get_model_run_predictions_for_grid( self.session, model_run, grid) - cumulative_precip = [0.0, 0.0, 0.0, 0.0] + nam_cumulative_precip = [0.0, 0.0, 0.0, 0.0] # Iterate through all the predictions. prev_prediction = None - # 00 and 12 hour model runs accumulate precipitation in 12 hour intervals, 06 and 18 hour accumlate in - # 3 hour intervals - model_run_hour = model_run.prediction_run_timestamp.hour - nam_accumulation_interval = 3 if model_run_hour == 6 or model_run_hour == 18 else 12 for prediction in query: + # NAM model requires manual calculation of cumulative precip if model_type == ModelEnum.NAM: - # Add up the cumulative precipitation over the course of the model run - if prediction.apcp_sfc_0 is None: - prediction.apcp_sfc_0 = [0.0, 0.0, 0.0, 0.0] - temp_precip = numpy.add(cumulative_precip, prediction.apcp_sfc_0) - if prediction.prediction_timestamp.hour % nam_accumulation_interval == 0: - # If we're on an 'accumulation interval', update the cumulative precip - cumulative_precip = temp_precip - prediction.apcp_sfc_0 = temp_precip + nam_cumulative_precip, prediction.apcp_sfc_0 = accumulate_nam_precipitation( + nam_cumulative_precip, prediction, model_run.prediction_run_timestamp.hour) if (prev_prediction is not None and prev_prediction.prediction_timestamp.hour == 18 and prediction.prediction_timestamp.hour == 21): diff --git a/api/app/tests/jobs/test_common_model_fetchers.py b/api/app/tests/jobs/test_common_model_fetchers.py new file mode 100644 index 000000000..333d755c7 --- /dev/null +++ b/api/app/tests/jobs/test_common_model_fetchers.py @@ -0,0 +1,64 @@ +from datetime import datetime +import numpy +from app.jobs.common_model_fetchers import accumulate_nam_precipitation +from app.db.models.weather_models import ModelRunGridSubsetPrediction + + +zero_hour_timestamp = datetime(2023, 9, 7, 0, 0, 0) +six_hour_timestamp = datetime(2023, 9, 7, 6, 0, 0) +non_accumulating_hour_timestamp = datetime(2023, 9, 7, 20, 0, 0) + + +def test_accumulator_is_zero_prediction_apcp_sfc_0_is_none(): + nam_cumulative_precip = numpy.array([0, 0, 0, 0]) + prediction = ModelRunGridSubsetPrediction( + apcp_sfc_0=None, prediction_timestamp=zero_hour_timestamp) + model_run_hour = 0 + cumulative_precip, prediction_precip = accumulate_nam_precipitation( + nam_cumulative_precip, prediction, model_run_hour) + assert (cumulative_precip == [0, 0, 0, 0]).all() + assert (prediction_precip == [0, 0, 0, 0]).all() + + +def test_accumulator_has_value_prediction_apcp_sfc_0_is_none(): + nam_cumulative_precip = numpy.array([1, 0, 0, 0]) + prediction = ModelRunGridSubsetPrediction( + apcp_sfc_0=None, prediction_timestamp=zero_hour_timestamp) + model_run_hour = 0 + cumulative_precip, prediction_precip = accumulate_nam_precipitation( + nam_cumulative_precip, prediction, model_run_hour) + assert (cumulative_precip == [1, 0, 0, 0]).all() + assert (prediction_precip == [1, 0, 0, 0]).all() + + +def test_accumulator_has_value_prediction_apcp_sfc_0_is_zero(): + nam_cumulative_precip = numpy.array([1, 0, 0, 0]) + prediction = ModelRunGridSubsetPrediction( + apcp_sfc_0=[0, 0, 0, 0], prediction_timestamp=zero_hour_timestamp) + model_run_hour = 0 + cumulative_precip, prediction_precip = accumulate_nam_precipitation( + nam_cumulative_precip, prediction, model_run_hour) + assert (cumulative_precip == [1, 0, 0, 0]).all() + assert (prediction_precip == [1, 0, 0, 0]).all() + + +def test_accumulator_has_value_prediction_apcp_sfc_0_has_value(): + nam_cumulative_precip = numpy.array([1, 0, 0, 0]) + prediction = ModelRunGridSubsetPrediction( + apcp_sfc_0=[1, 0, 0, 0], prediction_timestamp=zero_hour_timestamp) + model_run_hour = 0 + cumulative_precip, prediction_precip = accumulate_nam_precipitation( + nam_cumulative_precip, prediction, model_run_hour) + assert (cumulative_precip == [2, 0, 0, 0]).all() + assert (prediction_precip == [2, 0, 0, 0]).all() + + +def test_non_accumulating_prediction_hour(): + nam_cumulative_precip = numpy.array([1, 0, 0, 0]) + prediction = ModelRunGridSubsetPrediction( + apcp_sfc_0=[1, 0, 0, 0], prediction_timestamp=non_accumulating_hour_timestamp) + model_run_hour = 0 + cumulative_precip, prediction_precip = accumulate_nam_precipitation( + nam_cumulative_precip, prediction, model_run_hour) + assert (cumulative_precip == [1, 0, 0, 0]).all() + assert (prediction_precip == [2, 0, 0, 0]).all() From 92b772d73d9ebc19885c3561e20040afd322d5e3 Mon Sep 17 00:00:00 2001 From: Darren Boss Date: Thu, 7 Sep 2023 10:59:30 -0700 Subject: [PATCH 11/11] More tests --- .../tests/jobs/test_common_model_fetchers.py | 117 +++++++++++++++--- 1 file changed, 98 insertions(+), 19 deletions(-) diff --git a/api/app/tests/jobs/test_common_model_fetchers.py b/api/app/tests/jobs/test_common_model_fetchers.py index 333d755c7..adec615db 100644 --- a/api/app/tests/jobs/test_common_model_fetchers.py +++ b/api/app/tests/jobs/test_common_model_fetchers.py @@ -4,18 +4,21 @@ from app.db.models.weather_models import ModelRunGridSubsetPrediction -zero_hour_timestamp = datetime(2023, 9, 7, 0, 0, 0) -six_hour_timestamp = datetime(2023, 9, 7, 6, 0, 0) -non_accumulating_hour_timestamp = datetime(2023, 9, 7, 20, 0, 0) +ZERO_HOUR_TIMESTAMP = datetime(2023, 9, 7, 0, 0, 0) +TWELVE_HOUR_TIMESTAMP = datetime(2023, 9, 7, 12, 0, 0) +NON_ACCUMULATING_HOUR_TIMESTAMP = datetime(2023, 9, 7, 20, 0, 0) +MODEL_RUN_ZERO_HOUR = 0 +MODEL_RUN_SIX_HOUR = 6 +MODEL_RUN_TWELVE_HOUR = 12 +MODEL_RUN_EIGHTEEN_HOUR = 18 def test_accumulator_is_zero_prediction_apcp_sfc_0_is_none(): nam_cumulative_precip = numpy.array([0, 0, 0, 0]) prediction = ModelRunGridSubsetPrediction( - apcp_sfc_0=None, prediction_timestamp=zero_hour_timestamp) - model_run_hour = 0 + apcp_sfc_0=None, prediction_timestamp=ZERO_HOUR_TIMESTAMP) cumulative_precip, prediction_precip = accumulate_nam_precipitation( - nam_cumulative_precip, prediction, model_run_hour) + nam_cumulative_precip, prediction, MODEL_RUN_ZERO_HOUR) assert (cumulative_precip == [0, 0, 0, 0]).all() assert (prediction_precip == [0, 0, 0, 0]).all() @@ -23,10 +26,9 @@ def test_accumulator_is_zero_prediction_apcp_sfc_0_is_none(): def test_accumulator_has_value_prediction_apcp_sfc_0_is_none(): nam_cumulative_precip = numpy.array([1, 0, 0, 0]) prediction = ModelRunGridSubsetPrediction( - apcp_sfc_0=None, prediction_timestamp=zero_hour_timestamp) - model_run_hour = 0 + apcp_sfc_0=None, prediction_timestamp=ZERO_HOUR_TIMESTAMP) cumulative_precip, prediction_precip = accumulate_nam_precipitation( - nam_cumulative_precip, prediction, model_run_hour) + nam_cumulative_precip, prediction, MODEL_RUN_ZERO_HOUR) assert (cumulative_precip == [1, 0, 0, 0]).all() assert (prediction_precip == [1, 0, 0, 0]).all() @@ -34,10 +36,9 @@ def test_accumulator_has_value_prediction_apcp_sfc_0_is_none(): def test_accumulator_has_value_prediction_apcp_sfc_0_is_zero(): nam_cumulative_precip = numpy.array([1, 0, 0, 0]) prediction = ModelRunGridSubsetPrediction( - apcp_sfc_0=[0, 0, 0, 0], prediction_timestamp=zero_hour_timestamp) - model_run_hour = 0 + apcp_sfc_0=[0, 0, 0, 0], prediction_timestamp=ZERO_HOUR_TIMESTAMP) cumulative_precip, prediction_precip = accumulate_nam_precipitation( - nam_cumulative_precip, prediction, model_run_hour) + nam_cumulative_precip, prediction, MODEL_RUN_ZERO_HOUR) assert (cumulative_precip == [1, 0, 0, 0]).all() assert (prediction_precip == [1, 0, 0, 0]).all() @@ -45,20 +46,98 @@ def test_accumulator_has_value_prediction_apcp_sfc_0_is_zero(): def test_accumulator_has_value_prediction_apcp_sfc_0_has_value(): nam_cumulative_precip = numpy.array([1, 0, 0, 0]) prediction = ModelRunGridSubsetPrediction( - apcp_sfc_0=[1, 0, 0, 0], prediction_timestamp=zero_hour_timestamp) - model_run_hour = 0 + apcp_sfc_0=[1, 0, 0, 0], prediction_timestamp=ZERO_HOUR_TIMESTAMP) cumulative_precip, prediction_precip = accumulate_nam_precipitation( - nam_cumulative_precip, prediction, model_run_hour) + nam_cumulative_precip, prediction, MODEL_RUN_ZERO_HOUR) assert (cumulative_precip == [2, 0, 0, 0]).all() assert (prediction_precip == [2, 0, 0, 0]).all() -def test_non_accumulating_prediction_hour(): +def test_zero_hour_timstamp_with_accumulating_hour_timestamp(): nam_cumulative_precip = numpy.array([1, 0, 0, 0]) prediction = ModelRunGridSubsetPrediction( - apcp_sfc_0=[1, 0, 0, 0], prediction_timestamp=non_accumulating_hour_timestamp) - model_run_hour = 0 + apcp_sfc_0=[1, 0, 0, 0], prediction_timestamp=NON_ACCUMULATING_HOUR_TIMESTAMP) cumulative_precip, prediction_precip = accumulate_nam_precipitation( - nam_cumulative_precip, prediction, model_run_hour) + nam_cumulative_precip, prediction, MODEL_RUN_ZERO_HOUR) + assert (cumulative_precip == [1, 0, 0, 0]).all() + assert (prediction_precip == [2, 0, 0, 0]).all() + + +def test_zero_hour_model_run_with_accumulating_timestamp(): + nam_cumulative_precip = numpy.array([1, 0, 0, 0]) + prediction = ModelRunGridSubsetPrediction( + apcp_sfc_0=[1, 0, 0, 0], prediction_timestamp=TWELVE_HOUR_TIMESTAMP) + cumulative_precip, prediction_precip = accumulate_nam_precipitation( + nam_cumulative_precip, prediction, MODEL_RUN_ZERO_HOUR) + assert (cumulative_precip == [2, 0, 0, 0]).all() + assert (prediction_precip == [2, 0, 0, 0]).all() + + +def test_zero_hour_model_run_with_non_accumulating_timestamp(): + nam_cumulative_precip = numpy.array([1, 0, 0, 0]) + prediction = ModelRunGridSubsetPrediction( + apcp_sfc_0=[1, 0, 0, 0], prediction_timestamp=NON_ACCUMULATING_HOUR_TIMESTAMP) + cumulative_precip, prediction_precip = accumulate_nam_precipitation( + nam_cumulative_precip, prediction, MODEL_RUN_ZERO_HOUR) + assert (cumulative_precip == [1, 0, 0, 0]).all() + assert (prediction_precip == [2, 0, 0, 0]).all() + + +def test_six_hour_model_run_with_accumulating_timestamp(): + nam_cumulative_precip = numpy.array([1, 0, 0, 0]) + prediction = ModelRunGridSubsetPrediction( + apcp_sfc_0=[1, 0, 0, 0], prediction_timestamp=TWELVE_HOUR_TIMESTAMP) + cumulative_precip, prediction_precip = accumulate_nam_precipitation( + nam_cumulative_precip, prediction, MODEL_RUN_SIX_HOUR) + assert (cumulative_precip == [2, 0, 0, 0]).all() + assert (prediction_precip == [2, 0, 0, 0]).all() + + +def test_six_hour_model_run_with_non_accumulating_timestamp(): + nam_cumulative_precip = numpy.array([1, 0, 0, 0]) + prediction = ModelRunGridSubsetPrediction( + apcp_sfc_0=[1, 0, 0, 0], prediction_timestamp=NON_ACCUMULATING_HOUR_TIMESTAMP) + cumulative_precip, prediction_precip = accumulate_nam_precipitation( + nam_cumulative_precip, prediction, MODEL_RUN_SIX_HOUR) + assert (cumulative_precip == [1, 0, 0, 0]).all() + assert (prediction_precip == [2, 0, 0, 0]).all() + + +def test_twelve_hour_model_run_with_accumulating_timestamp(): + nam_cumulative_precip = numpy.array([1, 0, 0, 0]) + prediction = ModelRunGridSubsetPrediction( + apcp_sfc_0=[1, 0, 0, 0], prediction_timestamp=TWELVE_HOUR_TIMESTAMP) + cumulative_precip, prediction_precip = accumulate_nam_precipitation( + nam_cumulative_precip, prediction, MODEL_RUN_TWELVE_HOUR) + assert (cumulative_precip == [2, 0, 0, 0]).all() + assert (prediction_precip == [2, 0, 0, 0]).all() + + +def test_twelve_hour_model_run_with_non_accumulating_timestamp(): + nam_cumulative_precip = numpy.array([1, 0, 0, 0]) + prediction = ModelRunGridSubsetPrediction( + apcp_sfc_0=[1, 0, 0, 0], prediction_timestamp=NON_ACCUMULATING_HOUR_TIMESTAMP) + cumulative_precip, prediction_precip = accumulate_nam_precipitation( + nam_cumulative_precip, prediction, MODEL_RUN_TWELVE_HOUR) + assert (cumulative_precip == [1, 0, 0, 0]).all() + assert (prediction_precip == [2, 0, 0, 0]).all() + + +def test_eighteen_hour_model_run_with_accumulating_timestamp(): + nam_cumulative_precip = numpy.array([1, 0, 0, 0]) + prediction = ModelRunGridSubsetPrediction( + apcp_sfc_0=[1, 0, 0, 0], prediction_timestamp=TWELVE_HOUR_TIMESTAMP) + cumulative_precip, prediction_precip = accumulate_nam_precipitation( + nam_cumulative_precip, prediction, MODEL_RUN_EIGHTEEN_HOUR) + assert (cumulative_precip == [2, 0, 0, 0]).all() + assert (prediction_precip == [2, 0, 0, 0]).all() + + +def test_eighteen_hour_model_run_with_non_accumulating_timestamp(): + nam_cumulative_precip = numpy.array([1, 0, 0, 0]) + prediction = ModelRunGridSubsetPrediction( + apcp_sfc_0=[1, 0, 0, 0], prediction_timestamp=NON_ACCUMULATING_HOUR_TIMESTAMP) + cumulative_precip, prediction_precip = accumulate_nam_precipitation( + nam_cumulative_precip, prediction, MODEL_RUN_EIGHTEEN_HOUR) assert (cumulative_precip == [1, 0, 0, 0]).all() assert (prediction_precip == [2, 0, 0, 0]).all()