From c2a85a34f91af1aa8b3b815bcfde57470e5689b1 Mon Sep 17 00:00:00 2001 From: dgboss Date: Thu, 7 Sep 2023 12:05:30 -0700 Subject: [PATCH] NAM precipitation (#3096) --- api/app/jobs/common_model_fetchers.py | 29 +++- api/app/jobs/model_data.md | 113 +++++++++++--- api/app/jobs/noaa.py | 55 +++---- .../tests/jobs/test_common_model_fetchers.py | 143 ++++++++++++++++++ api/app/tests/weather_models/test_noaa_nam.py | 45 +++--- 5 files changed, 307 insertions(+), 78 deletions(-) create mode 100644 api/app/tests/jobs/test_common_model_fetchers.py diff --git a/api/app/jobs/common_model_fetchers.py b/api/app/jobs/common_model_fetchers.py index 857d64fb1..b98b45363 100644 --- a/api/app/jobs/common_model_fetchers.py +++ b/api/app/jobs/common_model_fetchers.py @@ -168,6 +168,20 @@ def apply_data_retention_policy(): delete_weather_station_model_predictions(session, oldest_to_keep) +def accumulate_nam_precipitation(nam_cumulative_precip: list[float], prediction: ModelRunGridSubsetPrediction, model_run_hour: int): + """ Calculate overall cumulative precip and cumulative precip for the current prediction. """ + # 00 and 12 hour model runs accumulate precipitation in 12 hour intervals, 06 and 18 hour accumulate in + # 3 hour intervals + nam_accumulation_interval = 3 if model_run_hour == 6 or model_run_hour == 18 else 12 + cumulative_precip = nam_cumulative_precip + prediction_precip = prediction.apcp_sfc_0 or [0.0, 0.0, 0.0, 0.0] + current_precip = numpy.add(nam_cumulative_precip, prediction_precip) + if prediction.prediction_timestamp.hour % nam_accumulation_interval == 0: + # If we're on an 'accumulation interval', update the cumulative precip + cumulative_precip = current_precip + return (cumulative_precip, current_precip) + + class ModelValueProcessor: """ Iterate through model runs that have completed, and calculate the interpolated weather predictions. """ @@ -178,7 +192,7 @@ def __init__(self, session, station_source: StationSourceEnum = StationSourceEnu self.stations = get_stations_synchronously(station_source) self.station_count = len(self.stations) - def _process_model_run(self, model_run: PredictionModelRunTimestamp): + def _process_model_run(self, model_run: PredictionModelRunTimestamp, model_type: ModelEnum): """ Interpolate predictions in the provided model run for all stations. """ logger.info('Interpolating values for model run: %s', model_run) # Iterate through stations. @@ -188,7 +202,7 @@ def _process_model_run(self, model_run: PredictionModelRunTimestamp): index, self.station_count, station.code, station.name) # Process this model run for station. - self._process_model_run_for_station(model_run, station) + self._process_model_run_for_station(model_run, station, model_type) # Commit all the weather station model predictions (it's fast if we line them all up and commit # them in one go.) logger.info('commit to database...') @@ -320,7 +334,8 @@ def _calculate_delta_precip(self, station, model_run, prediction, station_predic def _process_model_run_for_station(self, model_run: PredictionModelRunTimestamp, - station: WeatherStation): + station: WeatherStation, + model_type: ModelEnum): """ Process the model run for the provided station. """ # Extract the coordinate. @@ -353,9 +368,15 @@ def _process_model_run_for_station(self, query = get_model_run_predictions_for_grid( self.session, model_run, grid) + nam_cumulative_precip = [0.0, 0.0, 0.0, 0.0] # Iterate through all the predictions. prev_prediction = None + for prediction in query: + # NAM model requires manual calculation of cumulative precip + if model_type == ModelEnum.NAM: + nam_cumulative_precip, prediction.apcp_sfc_0 = accumulate_nam_precipitation( + nam_cumulative_precip, prediction, model_run.prediction_run_timestamp.hour) if (prev_prediction is not None and prev_prediction.prediction_timestamp.hour == 18 and prediction.prediction_timestamp.hour == 21): @@ -384,7 +405,7 @@ def process(self, model_type: ModelEnum): logger.info('model %s', model) logger.info('model_run %s', model_run) # Process the model run. - self._process_model_run(model_run) + self._process_model_run(model_run, model_type) # Mark the model run as interpolated. self._mark_model_run_interpolated(model_run) refresh_morecast2_materialized_view(self.session) diff --git a/api/app/jobs/model_data.md b/api/app/jobs/model_data.md index a4a4f8f56..b062c38c3 100644 --- a/api/app/jobs/model_data.md +++ b/api/app/jobs/model_data.md @@ -23,6 +23,7 @@ - [Linear Interpolation](#linear-interpolation) - [From grids to fire weather stations](#from-grids-to-fire-weather-stations) - [To 12:00 PST (13:00 PDT)](#to-1200-pst-1300-pdt) + - [Calculating predicted 24 hour precipitation](#calculating-predicted-24-hour-precipitation) - [Cronjobs](#cronjobs) @@ -66,7 +67,7 @@ The GDPS, RDPS, and HRDPS models have different cycle lengths, different resolut #### Relevant Weather Variables -The GDPS, RDPS, and HRDPS weather variables that we currently fetch for use in Morecast are `TMP_TGL_2m` (temperature), `RELH_TGL_2m` (relative humidity), `WIND_TGL_10m` (wind speed), `WDIR_TGL_10m` (wind direction), and `APCP_SFC_0` (accumulated precipitation). +The GDPS, RDPS, and HRDPS weather variables that we currently fetch for use in Morecast are `TMP_TGL_2m` (temperature), `RELH_TGL_2m` (relative humidity), `WIND_TGL_10m` (wind speed), `WDIR_TGL_10m` (wind direction), and `APCP_SFC_0` (accumulated precipitation since the start of the model run). ### NOAA @@ -79,7 +80,7 @@ Bottom latitude: 48 #### GFS -We fetch GFS model data on a 0.25° scale (roughly equivalent to 24km at the centre of BC). +We fetch GFS model data on a 0.25° scale (roughly equivalent to 24km at the centre of BC). ##### Model Run Cycles @@ -102,28 +103,86 @@ NAM model data is published on a Polar Stereographic projection at a 32km resolu ##### Model Run Cycles -The NAM model makes predictions for every hour for the first 36 hours, and then on a 3-hourly schedule to 84 hours [https://mag.ncep.noaa.gov/model-guidance-model-parameter.php?group=Model%20Guidance&model=NAM&area=NAMER&ps=area#]. As with the GFS model, we sometimes need to fetch data for 2 different hour offsets and then perform linear interpolation in order to have a weather prediction for 12:00 PST. The file naming convention that NAM uses indicates the model runtime as hours in UTC, and indexes that hour as 00. All subsequent hours in the model run are referenced as offsets of the 00 hour index. This means that for the 00:00 UTC model cycle, 20:00 UTC (or 12:00 PST) is referenced as hour 20; but for the 12:00 UTC model cycle, 20:00 UTC is referenced as hour 8 (20:00 - 12:00). Therefore, to fetch the noon PST data we need, the NAM cronjob pulls the following model hours: +The NAM model makes predictions for every hour for the first 36 hours, and then on a 3-hourly schedule to 84 hours [https://mag.ncep.noaa.gov/model-guidance-model-parameter.php?group=Model%20Guidance&model=NAM&area=NAMER&ps=area#]. As with the GFS model, we sometimes need to fetch data for 2 different hour offsets and then perform linear interpolation in order to have a weather prediction for 12:00 PST. The file naming convention that NAM uses indicates the model runtime as hours in UTC, and indexes that hour as 00. All subsequent hours in the model run are referenced as offsets of the 00 hour index. This means that for the 00:00 UTC model cycle, 20:00 UTC (or 12:00 PST) is referenced as hour 20; but for the 12:00 UTC model cycle, 20:00 UTC is referenced as hour 8 (20:00 - 12:00). Therefore, to fetch the noon PST data we need, the NAM cronjob pulls the following model hours. noon hours are equivalent to 20:00 UTC. before_noon hours are 18:00 UTC and used for linear interpolation along with after_noon hours which are 21:00 UTC. Accumulation hours represent the prediction hours required in oorder to calcaulte accumulative precipitation throughout the model run. - For the 00:00 UTC model run time: - - Day 0: hour 20 (20:00 UTC = 12:00 PST) - - Day 1: hours 42 and 45, to interpolate for hour 44 (20:00 UTC) - - Day 2: hours 66 and 69, to interpolate for hour 68 (20:00 UTC) + - accumulation_hours = 0, 12, 24, 36, 48, 60 + - noon = 20 + - before_noon = 18, 42, 66 + - after_noon = 21, 45, 69 - For the 06:00 UTC model run time: - - Day 0: hour 14 (6+14 = 20:00 UTC) - - Day 1: hours 36 and 39, to interpolate for hour 38 (38-24=14 hours, 14+06:00 = 20:00 UTC of the next day) - - Day 2: hours 60 and 63, to interpolate for hour 62 (20:00 UTC of 2 days later) + - accumulation_hours = 0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45, 48, 51, 54, 57, 60, 63, 66 + - noon = 14 + - before_noon = 12, 36, 60 + - after_noon = 15, 39, 63 - For the 12:00 UTC model run time: - - Day 0: hour 8 (12+8 = 20:00 UTC) - - Day 1: hour 32 (12+32-24 = 20:00 UTC of the next day) - - Day 2: hours 54 and 57, to interpolate for hour 56 (56-48 hours = 8 hours, 8+12:00 = 20:00 UTC of 2 days later) + - accumulation_hours = 0, 12, 24, 36, 48, 60, 72 + - noon = 8, 32 + - before_noon = 6, 30, 54, 78 + - after_noon = 9, 33, 57, 81 - For the 18:00 UTC model run time: - - Day 0: hour 2 (18+2 = 20:00 UTC) - - Day 1: hour 26 (18+26-24 = 20:00 UTC of the next day) - - Day 2: hours 48 and 51, to interpolate for hour 50 (50-48 hours = 2 hours, 2 + 18:00 = 20:00 UTC of 2 days later) + - accumulation_hours = 0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45, 48, 51, 54, 57, 60, 63, 66, 69, 72, 75 + - noon = [2, 26] + - before_noon = [0, 24, 48, 72] + - after_noon = [3, 27, 51, 75] + +Note that the accumulation hours are required in order for us to calculate the cumulative precipitation across the model run which is needed when we subsequently calculate cumulative 24 hour precipitation. #### Relevant Weather Variables -To be consistent with the weather variables that we pull from the Environment Canada models, for GFS we use comparable weather variables. There is a slight exception for wind direction and speed: Env Canada models output the separate U and V components for wind, but they also output the calculated wind speed and direction using those U, V components. NOAA only outputs the U,V components for wind - we must do the wind direction and wind speed calculations ourselves. +To be consistent with the weather variables that we pull from the Environment Canada models, for GFS we use comparable weather variables. There is a slight exception for wind direction and speed: Env Canada models output the separate U and V components for wind, but they also output the calculated wind speed and direction using those U, V components. NOAA only outputs the U,V components for wind - we must do the wind direction and wind speed calculations ourselves. The NOAA grib files for GFS contain two layers related to cumulative precipitation. Both layers are named 'apcp_sfc_0' but accumulate precipitation differently. Raster band 6 accumulates precip in 3 hour increments for odd hours and six hour increments for even hours, but we actually ignore this band. We use band 7 which accumulats precipitation across the model run. + +The NAM grib file does not contain a band for cumulative precipitation since the start of the model. This requires us to use band 6 which accumulates precipitation slightly differently for model run hours 00 and 12 versus 06 and 18. The 00 and 12 hour model runs contain cumulative precipitation in 12 hour cycles. + +For the 00 and 12 hour model runs: + +| Hour from start | Cumulative precip hours | +| --------------- | ----------------------- | +| 1 | 0 - 1 | +| 2 | 0 - 2 | +| 3 | 0 - 3 | +| ... | ... - ... | +| 12 | 0 - 12 | +| 13 | 12 - 13 | +| 14 | 12 - 14 | +| ... | ... - ... | +| 24 | 12 - 24 | +| 25 | 24 - 25 | +| ... | ... - ... | +| 36 | 24 - 36 | +| 39 | 36 - 39 | +| 42 | 36 - 42 | +| 45 | 36 - 45 | +| 48 | 36 - 48 | +| 51 | 48 - 51 | +| ... | ... - ... | + +For the 06 and 18 hour model runs precipitation accumlates in 3 hour intervals: + +| Hour from start | Cumulative precip hours | +| --------------- | ----------------------- | +| 1 | 0 - 1 | +| 2 | 0 - 2 | +| 3 | 0 - 3 | +| 4 | 3 - 4 | +| 5 | 3 - 5 | +| 6 | 3 - 6 | +| 7 | 6 - 7 | +| 8 | 6 - 8 | +| 9 | 6 - 9 | +| 10 | 9 - 10 | +| 11 | 9 - 11 | +| 12 | 9 - 12 | +| 13 | 12 - 13 | +| ... | ... - ... | +| 36 | 33 - 36 | +| 39 | 36 - 39 | +| 42 | 39 - 42 | +| 45 | 42 - 45 | +| 48 | 45 - 48 | +| ... | ... - ... | + + ## Downloading Data @@ -135,15 +194,21 @@ There is a lot of data to download and analyze for these weather models, so to e Once model data has been downloaded, we store the model data for "grid subsets" relevant to our use in our `wps` Postgres database in the `model_run_grid_subset_predictions` table. Each row in this table stores temperature, RH, precipitation, wind speed, and wind direction data, as we need for Morecast. For each of these weather variables, each row contains an array of 4 data points, corresponding to the 4 corners of one square of the grid. The `prediction_model_grid_subset_id` in this table corresponds to a grid cell that contains one of BC's fire weather station locations, as stored in the `prediction_model_grid_subsets` table. When our backend inserts data into the `model_run_grid_subset_predictions` table, it also adds metadata pertinent to the specific model run. -Once the `model_run_grid_subset_predictions` table has been populated for a given model run, our backend then performs linear interpolation based on the geographic location of each weather station to calculate an approximated value for each weather variable. +Our back-end used to perform spatial interpolation using the four values in the grid subset. We no longer follow this process. In order to align with SpotWX, we now use the value from the grid cell in the grib file in which the weather station is located. These values are stored in the `weather_station_model_predictions` table. + +TODO - We need to amend our back-end code to only pull the necessary value from the grib files instead of pulling these grid subsets (4 values). + +~~Once the `model_run_grid_subset_predictions` table has been populated for a given model run, our backend then performs linear interpolation based on the geographic location of each weather station to calculate an approximated value for each weather variable.~~ -For example, a small portion of the grid for a temperature raster band of a GRIB file might look like the image below. In this example, each point on the grid is spaced 5km apart. You can see that a fire weather station is located within one cell of this grid, where the 4 corners of the cell have temperature values of 3.1°, 2.4°, 2.7°, and 2.7°C. In order to approximate the predicted temperature at the weather station's exact location, we perform linear interpolation of the 4 temperature values listed above, where the weight of each data point is inversely proportional to the distance from the weather station to that point on the grid. Consequently, the 2.4°C from the top right corner of the cell will have the highest weight as it is closest to the weather station, and the 2.7°C from the bottom left corner will have the lowest weight as it is the furthest away. +~~For example, a small portion of the grid for a temperature raster band of a GRIB file might look like the image below. In this example, each point on the grid is spaced 5km apart. You can see that a fire weather station is located within one cell of this grid, where the 4 corners of the cell have temperature values of 3.1°, 2.4°, 2.7°, and 2.7°C. In order to approximate the predicted temperature at the weather station's exact location, we perform linear interpolation of the 4 temperature values listed above, where the weight of each data point is inversely proportional to the distance from the weather station to that point on the grid. Consequently, the 2.4°C from the top right corner of the cell will have the highest weight as it is closest to the weather station, and the 2.7°C from the bottom left corner will have the lowest weight as it is the furthest away.~~ -![Location-based linear interpolation for a weather station](../../../docs/images/Grid_wx_station.png) +~~![Location-based linear interpolation for a weather station](../../../docs/images/Grid_wx_station.png)~~ -This linear interpolation process is repeated for each of the weather variables in a model run, and for each of the weather stations in BC. These calculations are stored in the `weather_station_model_predictions` table. +~~This linear interpolation process is repeated for each of the weather variables in a model run, and for each of the weather stations in BC. These calculations are stored in the `weather_station_model_predictions` table.~~ -At the present time we are only performing linear interpolation from grid subsets to weather stations based on geographic distance (because it is the simplest method of interpolation). Forecasters have requested that we also perform linear interpolation based on station elevation as the results may be more accurate - this is a pending story in our backlog. +~~At the present time we are only performing linear interpolation from grid subsets to weather stations based on geographic distance (because it is the simplest method of interpolation).~~ + +Forecasters have requested that we also perform linear interpolation based on station elevation as the results may be more accurate - this is a pending story in our backlog. ### To 12:00 PST (13:00 PDT) @@ -151,6 +216,12 @@ Fire weather forecasters in the BC Wildfire Service create daily "noon forecasts This time-based linear interpolation is done as part of the data analysis process performed when the model data is retrieved from our third-party sources. See `process_grib.py` +### Calculating predicted 24 hour precipitation + +For any given date, Morecast 2 displays the precipitation predicted to fall in the 24 hours previous to that date. For example, for August 30, 2023, the predicted precip covers a time period from August 29, 2023 20:00 UTC to August 30, 2023 20:00 UTC. The first 24 hours of a numerical weather model run present a challenge because there is no way to calculate the predicted 24 hour precip purely from the model run data. Consider a HRDPS model run on August 30 at 12:00 UTC. Morecast 2 needs to display the predicted precipitation for August 30 at 20:00, but at that point we only have 8 hours of accumulated precipitation (from 12:00 to 20:00) from our model run, we are potentially missing precipitation that could have fallen from Aug 29 20:00 to August 30 12:00 which means we're missing 16 hours of data. To work around this, we pull actual precip values for that 16 hour period from our hourly_actuals table which scrapes observed values from the WF1 API. + +Continuing on with the above example, we also need to display 24 hour predicted cumulative precip for August 31 20:00. Since our model run contains data from more than 24 hours ago, we can perform simple subtraction of accumulated precipitation to get our 24 hour predicted cumulative precip (Aug 31 20:00 - Aug 30 20:00). + ## Cronjobs We have created a separate cronjob for each model, to retrieve the data from source at various times of day, depending on how often each model is run. Our cronjobs are set to execute at randomly selected minutes within the set hour, as a courtesy to our third-party data sources so that their servers are not overwhelmed with many simultaneous requests. \ No newline at end of file diff --git a/api/app/jobs/noaa.py b/api/app/jobs/noaa.py index c9e4ba4e8..8c4fbcf82 100644 --- a/api/app/jobs/noaa.py +++ b/api/app/jobs/noaa.py @@ -37,7 +37,7 @@ # ------- NAM static variables ----------- # -NAM_BASE_URL = 'https://nomads.ncep.noaa.gov/cgi-bin/filter_nam_na.pl?' +NAM_BASE_URL = 'https://nomads.ncep.noaa.gov/cgi-bin/filter_nam.pl?' # -------------------------------------- # @@ -90,44 +90,35 @@ def get_noaa_subregion_filter_str() -> str: def get_nam_model_run_download_urls(download_date: datetime.datetime, model_cycle: str) -> Generator[str, None, None]: - """ Yield URLs to download NAM North America model runs """ - # for model_cycle 00: - # for first day, need hour 20:00 UTC (13:00 PST) - # Day 2: need hours 42 and 45 to interpolate for hour 44 (13:00 PST) - # Day 3: need hours 66 and 69 to interpolate for hour 68 (13:00 PST) - # - # for model_cycle 06: - # for first day, need hour 14 (14+6 = 20:00 UTC) - # Day 2: need hours 36 and 39 to interpolate for hour 38 (13:00 PST) - # Day 3: need hours 60 and 63 to interpolate for hour 62 (13:00 PST) - # - # for model_cycle 12: - # for first day, need hour 8 (12+8 = 20:00 UTC) - # for second day, need hour 32 (12 + (32-24) = 20:00 UTC) - # Day 3: need hours 54 and 57 to interpolate for hour 56 (13:00 PST) - # - # for model_cycle 18: - # for first day, need hour 2 (18 + 2 = 20:00 UTC) - # for second day, need hour 26 (18 + (26-24) = 20:00 UTC) - # Day 3: need hours 48 and 51 to interpolate for hour 50 (13:00 PST) + """ Yield URLs to download NAM North America model runs. """ + # The NAM does not accumulate precipitation throughout the model run. The 00 and 12 hour model runs acculmulate + # precip at 12 hour intervals and the 06 and 18 hour model runs accumulate precip at 3 hour intervals. + # The accumulation_hours represent the hours needed in order to calculate accumulated precipitation for the model + # run. The noon variables contain a list of 20:00 UTC time for which a prediction exits for the NAM. The before_noon + # and after_noon variables contain lists of 18:00 UTC times and 21:00 UTC times needed for interpolating to 20:00 + # UTC as exact 20:00 UTC predictions do not exist beyond hour 36 of the model run. if model_cycle == '00': + accumulation_hours = [hour for hour in range(0, 61, 12)] noon = [20] - before_noon = [42, 66] - after_noon = [45, 69] + before_noon = [18, 42, 66] + after_noon = [21, 45, 69] elif model_cycle == '06': + accumulation_hours = [hour for hour in range(0, 67, 3)] noon = [14] - before_noon = [36, 60] - after_noon = [39, 63] + before_noon = [12, 36, 60] + after_noon = [15, 39, 63] elif model_cycle == '12': + accumulation_hours = [hour for hour in range(0, 73, 12)] noon = [8, 32] - before_noon = [54] - after_noon = [57] + before_noon = [6, 30, 54, 78] + after_noon = [9, 33, 57, 81] elif model_cycle == '18': + accumulation_hours = [hour for hour in range(0, 70, 3)] noon = [2, 26] - before_noon = [48] - after_noon = [51] + before_noon = [0, 24, 48, 72] + after_noon = [3, 27, 51, 75] - all_hours = noon + before_noon + after_noon + all_hours = list(set(accumulation_hours + noon + before_noon + after_noon)) # sort list purely for human convenience when debugging. Functionally it doesn't matter all_hours.sort() @@ -138,7 +129,7 @@ def get_nam_model_run_download_urls(download_date: datetime.datetime, model_cycl for fcst_hour in all_hours: hh = format(fcst_hour, '02d') - filter_str = f'dir=%2Fnam.{year_mo_date}&file=nam.t{model_cycle}z.awip32{hh}.tm00.grib2&' + filter_str = f'dir=%2Fnam.{year_mo_date}&file=nam.t{model_cycle}z.awphys{hh}.tm00.grib2&' wx_vars_filter_str = get_noaa_wx_variables_filter_str() levels_filter_str = get_noaa_levels_filter_str() subregion_filter_str = get_noaa_subregion_filter_str() @@ -212,7 +203,7 @@ def parse_gfs_url_for_timestamps(url: str): def parse_nam_url_for_timestamps(url: str): """ Interpret the model_run_timestamp and prediction_timestamp from a NAM model's URL """ - # sample URL: 'https://nomads.ncep.noaa.gov/cgi-bin/filter_nam_na.pl?dir=%2Fnam.20230414&file=nam.t00z.awip3220.tm00.grib2&var_APCP=on&var_RH=on&var_TMP=on&var_UGRD=on&var_VGRD=on&lev_surface=on&lev_2_m_above_ground=on&lev_10_m_above_ground=on&subregion=&toplat=60&leftlon=-139&rightlon=-114&bottomlat=48' + # sample URL: 'https://nomads.ncep.noaa.gov/cgi-bin/filter_nam_na.pl?dir=%2Fnam.20230414&file=nam.t00z.awphys20.tm00.grib2&var_APCP=on&var_RH=on&var_TMP=on&var_UGRD=on&var_VGRD=on&lev_surface=on&lev_2_m_above_ground=on&lev_10_m_above_ground=on&subregion=&toplat=60&leftlon=-139&rightlon=-114&bottomlat=48' query = urlsplit(url).query params = parse_qs(query) model_run_date = params['dir'][0].split('.')[1] diff --git a/api/app/tests/jobs/test_common_model_fetchers.py b/api/app/tests/jobs/test_common_model_fetchers.py new file mode 100644 index 000000000..adec615db --- /dev/null +++ b/api/app/tests/jobs/test_common_model_fetchers.py @@ -0,0 +1,143 @@ +from datetime import datetime +import numpy +from app.jobs.common_model_fetchers import accumulate_nam_precipitation +from app.db.models.weather_models import ModelRunGridSubsetPrediction + + +ZERO_HOUR_TIMESTAMP = datetime(2023, 9, 7, 0, 0, 0) +TWELVE_HOUR_TIMESTAMP = datetime(2023, 9, 7, 12, 0, 0) +NON_ACCUMULATING_HOUR_TIMESTAMP = datetime(2023, 9, 7, 20, 0, 0) +MODEL_RUN_ZERO_HOUR = 0 +MODEL_RUN_SIX_HOUR = 6 +MODEL_RUN_TWELVE_HOUR = 12 +MODEL_RUN_EIGHTEEN_HOUR = 18 + + +def test_accumulator_is_zero_prediction_apcp_sfc_0_is_none(): + nam_cumulative_precip = numpy.array([0, 0, 0, 0]) + prediction = ModelRunGridSubsetPrediction( + apcp_sfc_0=None, prediction_timestamp=ZERO_HOUR_TIMESTAMP) + cumulative_precip, prediction_precip = accumulate_nam_precipitation( + nam_cumulative_precip, prediction, MODEL_RUN_ZERO_HOUR) + assert (cumulative_precip == [0, 0, 0, 0]).all() + assert (prediction_precip == [0, 0, 0, 0]).all() + + +def test_accumulator_has_value_prediction_apcp_sfc_0_is_none(): + nam_cumulative_precip = numpy.array([1, 0, 0, 0]) + prediction = ModelRunGridSubsetPrediction( + apcp_sfc_0=None, prediction_timestamp=ZERO_HOUR_TIMESTAMP) + cumulative_precip, prediction_precip = accumulate_nam_precipitation( + nam_cumulative_precip, prediction, MODEL_RUN_ZERO_HOUR) + assert (cumulative_precip == [1, 0, 0, 0]).all() + assert (prediction_precip == [1, 0, 0, 0]).all() + + +def test_accumulator_has_value_prediction_apcp_sfc_0_is_zero(): + nam_cumulative_precip = numpy.array([1, 0, 0, 0]) + prediction = ModelRunGridSubsetPrediction( + apcp_sfc_0=[0, 0, 0, 0], prediction_timestamp=ZERO_HOUR_TIMESTAMP) + cumulative_precip, prediction_precip = accumulate_nam_precipitation( + nam_cumulative_precip, prediction, MODEL_RUN_ZERO_HOUR) + assert (cumulative_precip == [1, 0, 0, 0]).all() + assert (prediction_precip == [1, 0, 0, 0]).all() + + +def test_accumulator_has_value_prediction_apcp_sfc_0_has_value(): + nam_cumulative_precip = numpy.array([1, 0, 0, 0]) + prediction = ModelRunGridSubsetPrediction( + apcp_sfc_0=[1, 0, 0, 0], prediction_timestamp=ZERO_HOUR_TIMESTAMP) + cumulative_precip, prediction_precip = accumulate_nam_precipitation( + nam_cumulative_precip, prediction, MODEL_RUN_ZERO_HOUR) + assert (cumulative_precip == [2, 0, 0, 0]).all() + assert (prediction_precip == [2, 0, 0, 0]).all() + + +def test_zero_hour_timstamp_with_accumulating_hour_timestamp(): + nam_cumulative_precip = numpy.array([1, 0, 0, 0]) + prediction = ModelRunGridSubsetPrediction( + apcp_sfc_0=[1, 0, 0, 0], prediction_timestamp=NON_ACCUMULATING_HOUR_TIMESTAMP) + cumulative_precip, prediction_precip = accumulate_nam_precipitation( + nam_cumulative_precip, prediction, MODEL_RUN_ZERO_HOUR) + assert (cumulative_precip == [1, 0, 0, 0]).all() + assert (prediction_precip == [2, 0, 0, 0]).all() + + +def test_zero_hour_model_run_with_accumulating_timestamp(): + nam_cumulative_precip = numpy.array([1, 0, 0, 0]) + prediction = ModelRunGridSubsetPrediction( + apcp_sfc_0=[1, 0, 0, 0], prediction_timestamp=TWELVE_HOUR_TIMESTAMP) + cumulative_precip, prediction_precip = accumulate_nam_precipitation( + nam_cumulative_precip, prediction, MODEL_RUN_ZERO_HOUR) + assert (cumulative_precip == [2, 0, 0, 0]).all() + assert (prediction_precip == [2, 0, 0, 0]).all() + + +def test_zero_hour_model_run_with_non_accumulating_timestamp(): + nam_cumulative_precip = numpy.array([1, 0, 0, 0]) + prediction = ModelRunGridSubsetPrediction( + apcp_sfc_0=[1, 0, 0, 0], prediction_timestamp=NON_ACCUMULATING_HOUR_TIMESTAMP) + cumulative_precip, prediction_precip = accumulate_nam_precipitation( + nam_cumulative_precip, prediction, MODEL_RUN_ZERO_HOUR) + assert (cumulative_precip == [1, 0, 0, 0]).all() + assert (prediction_precip == [2, 0, 0, 0]).all() + + +def test_six_hour_model_run_with_accumulating_timestamp(): + nam_cumulative_precip = numpy.array([1, 0, 0, 0]) + prediction = ModelRunGridSubsetPrediction( + apcp_sfc_0=[1, 0, 0, 0], prediction_timestamp=TWELVE_HOUR_TIMESTAMP) + cumulative_precip, prediction_precip = accumulate_nam_precipitation( + nam_cumulative_precip, prediction, MODEL_RUN_SIX_HOUR) + assert (cumulative_precip == [2, 0, 0, 0]).all() + assert (prediction_precip == [2, 0, 0, 0]).all() + + +def test_six_hour_model_run_with_non_accumulating_timestamp(): + nam_cumulative_precip = numpy.array([1, 0, 0, 0]) + prediction = ModelRunGridSubsetPrediction( + apcp_sfc_0=[1, 0, 0, 0], prediction_timestamp=NON_ACCUMULATING_HOUR_TIMESTAMP) + cumulative_precip, prediction_precip = accumulate_nam_precipitation( + nam_cumulative_precip, prediction, MODEL_RUN_SIX_HOUR) + assert (cumulative_precip == [1, 0, 0, 0]).all() + assert (prediction_precip == [2, 0, 0, 0]).all() + + +def test_twelve_hour_model_run_with_accumulating_timestamp(): + nam_cumulative_precip = numpy.array([1, 0, 0, 0]) + prediction = ModelRunGridSubsetPrediction( + apcp_sfc_0=[1, 0, 0, 0], prediction_timestamp=TWELVE_HOUR_TIMESTAMP) + cumulative_precip, prediction_precip = accumulate_nam_precipitation( + nam_cumulative_precip, prediction, MODEL_RUN_TWELVE_HOUR) + assert (cumulative_precip == [2, 0, 0, 0]).all() + assert (prediction_precip == [2, 0, 0, 0]).all() + + +def test_twelve_hour_model_run_with_non_accumulating_timestamp(): + nam_cumulative_precip = numpy.array([1, 0, 0, 0]) + prediction = ModelRunGridSubsetPrediction( + apcp_sfc_0=[1, 0, 0, 0], prediction_timestamp=NON_ACCUMULATING_HOUR_TIMESTAMP) + cumulative_precip, prediction_precip = accumulate_nam_precipitation( + nam_cumulative_precip, prediction, MODEL_RUN_TWELVE_HOUR) + assert (cumulative_precip == [1, 0, 0, 0]).all() + assert (prediction_precip == [2, 0, 0, 0]).all() + + +def test_eighteen_hour_model_run_with_accumulating_timestamp(): + nam_cumulative_precip = numpy.array([1, 0, 0, 0]) + prediction = ModelRunGridSubsetPrediction( + apcp_sfc_0=[1, 0, 0, 0], prediction_timestamp=TWELVE_HOUR_TIMESTAMP) + cumulative_precip, prediction_precip = accumulate_nam_precipitation( + nam_cumulative_precip, prediction, MODEL_RUN_EIGHTEEN_HOUR) + assert (cumulative_precip == [2, 0, 0, 0]).all() + assert (prediction_precip == [2, 0, 0, 0]).all() + + +def test_eighteen_hour_model_run_with_non_accumulating_timestamp(): + nam_cumulative_precip = numpy.array([1, 0, 0, 0]) + prediction = ModelRunGridSubsetPrediction( + apcp_sfc_0=[1, 0, 0, 0], prediction_timestamp=NON_ACCUMULATING_HOUR_TIMESTAMP) + cumulative_precip, prediction_precip = accumulate_nam_precipitation( + nam_cumulative_precip, prediction, MODEL_RUN_EIGHTEEN_HOUR) + assert (cumulative_precip == [1, 0, 0, 0]).all() + assert (prediction_precip == [2, 0, 0, 0]).all() diff --git a/api/app/tests/weather_models/test_noaa_nam.py b/api/app/tests/weather_models/test_noaa_nam.py index cfbb910d6..63931d3fa 100644 --- a/api/app/tests/weather_models/test_noaa_nam.py +++ b/api/app/tests/weather_models/test_noaa_nam.py @@ -10,52 +10,55 @@ def test_get_nam_model_run_download_urls_for_00_utc(): # March 2 at 00:00 UTC is equivalent to March 1 in Eastern timezone - should return URL for previous day - # for the 00 UTC run, there should be 5 urls (for hours 20, 42, 45, 66, 69) - expected_num_of_urls = 5 + # for the 00 UTC run, there should be 13 urls (for hours 0, 12, 18, 20, 21, 24, 36, 42, 45, 48, 60, 66, 69) + expected_num_of_urls = 13 actual_urls = list(noaa.get_nam_model_run_download_urls(datetime(2023, 3, 2, 00, tzinfo=timezone.utc), '00')) assert len(actual_urls) == expected_num_of_urls - assert actual_urls[0] == noaa.NAM_BASE_URL + 'dir=%2Fnam.20230301&file=nam.t00z.awip3220.tm00.grib2&var_APCP=on&var_RH=on&var_TMP=on&var_UGRD=on&var_VGRD=on&lev_surface=on&lev_2_m_above_ground=on&lev_10_m_above_ground=on&subregion=&toplat=60&leftlon=-139&rightlon=-114&bottomlat=48' - assert actual_urls[-1] == noaa.NAM_BASE_URL + 'dir=%2Fnam.20230301&file=nam.t00z.awip3269.tm00.grib2&var_APCP=on&var_RH=on&var_TMP=on&var_UGRD=on&var_VGRD=on&lev_surface=on&lev_2_m_above_ground=on&lev_10_m_above_ground=on&subregion=&toplat=60&leftlon=-139&rightlon=-114&bottomlat=48' + assert actual_urls[0] == noaa.NAM_BASE_URL + 'dir=%2Fnam.20230301&file=nam.t00z.awphys00.tm00.grib2&var_APCP=on&var_RH=on&var_TMP=on&var_UGRD=on&var_VGRD=on&lev_surface=on&lev_2_m_above_ground=on&lev_10_m_above_ground=on&subregion=&toplat=60&leftlon=-139&rightlon=-114&bottomlat=48' + assert actual_urls[-1] == noaa.NAM_BASE_URL + 'dir=%2Fnam.20230301&file=nam.t00z.awphys69.tm00.grib2&var_APCP=on&var_RH=on&var_TMP=on&var_UGRD=on&var_VGRD=on&lev_surface=on&lev_2_m_above_ground=on&lev_10_m_above_ground=on&subregion=&toplat=60&leftlon=-139&rightlon=-114&bottomlat=48' def test_get_nam_model_run_download_urls_for_06_utc(): # Feb 15 at 06:00 UTC is still Feb 15 (at 02:00) in Eastern timezone - should return URL for same day - # for the 06 UTC run, there should be 5 urls (for hours 14, 36, 39, 60, 63) - expected_num_of_urls = 5 + # for the 06 UTC run, there should be 24 urls (for hours 0, 3, 6, 9, 12, 14, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, + # 45, 48, 51, 54, 57, 60, 63, 66) + expected_num_of_urls = 24 actual_urls = list(noaa.get_nam_model_run_download_urls(datetime(2023, 2, 15, 6, tzinfo=timezone.utc), '06')) assert len(actual_urls) == expected_num_of_urls assert actual_urls[0] == noaa.NAM_BASE_URL + \ - 'dir=%2Fnam.20230215&file=nam.t06z.awip3214.tm00.grib2&var_APCP=on&var_RH=on&var_TMP=on&var_UGRD=on&var_VGRD=on&lev_surface=on&lev_2_m_above_ground=on&lev_10_m_above_ground=on&subregion=&toplat=60&leftlon=-139&rightlon=-114&bottomlat=48' - assert actual_urls[1] == noaa.NAM_BASE_URL + 'dir=%2Fnam.20230215&file=nam.t06z.awip3236.tm00.grib2&var_APCP=on&var_RH=on&var_TMP=on&var_UGRD=on&var_VGRD=on&lev_surface=on&lev_2_m_above_ground=on&lev_10_m_above_ground=on&subregion=&toplat=60&leftlon=-139&rightlon=-114&bottomlat=48' - assert actual_urls[-1] == noaa.NAM_BASE_URL + 'dir=%2Fnam.20230215&file=nam.t06z.awip3263.tm00.grib2&var_APCP=on&var_RH=on&var_TMP=on&var_UGRD=on&var_VGRD=on&lev_surface=on&lev_2_m_above_ground=on&lev_10_m_above_ground=on&subregion=&toplat=60&leftlon=-139&rightlon=-114&bottomlat=48' + 'dir=%2Fnam.20230215&file=nam.t06z.awphys00.tm00.grib2&var_APCP=on&var_RH=on&var_TMP=on&var_UGRD=on&var_VGRD=on&lev_surface=on&lev_2_m_above_ground=on&lev_10_m_above_ground=on&subregion=&toplat=60&leftlon=-139&rightlon=-114&bottomlat=48' + assert actual_urls[1] == noaa.NAM_BASE_URL + 'dir=%2Fnam.20230215&file=nam.t06z.awphys03.tm00.grib2&var_APCP=on&var_RH=on&var_TMP=on&var_UGRD=on&var_VGRD=on&lev_surface=on&lev_2_m_above_ground=on&lev_10_m_above_ground=on&subregion=&toplat=60&leftlon=-139&rightlon=-114&bottomlat=48' + assert actual_urls[-1] == noaa.NAM_BASE_URL + 'dir=%2Fnam.20230215&file=nam.t06z.awphys66.tm00.grib2&var_APCP=on&var_RH=on&var_TMP=on&var_UGRD=on&var_VGRD=on&lev_surface=on&lev_2_m_above_ground=on&lev_10_m_above_ground=on&subregion=&toplat=60&leftlon=-139&rightlon=-114&bottomlat=48' def test_get_nam_model_run_download_urls_for_12_utc(): # March 9 at 12:00 UTC is still March 9 in Eastern timezone - should return URL for same day - # for the 12 UTC run, there should be 4 urls (for hours 08, 32, 54, 57) - expected_num_of_urls = 4 + # for the 12 UTC run, there should be 17 urls (for hours 0, 6, 8, 9, 12, 24, 30, 32, 33, 36, 48, 54, 57, 60, 72, 78, + # 81) + expected_num_of_urls = 17 actual_urls = list(noaa.get_nam_model_run_download_urls(datetime(2023, 3, 9, 12, tzinfo=timezone.utc), '12')) assert len(actual_urls) == expected_num_of_urls - assert actual_urls[0] == noaa.NAM_BASE_URL + 'dir=%2Fnam.20230309&file=nam.t12z.awip3208.tm00.grib2&var_APCP=on&var_RH=on&var_TMP=on&var_UGRD=on&var_VGRD=on&lev_surface=on&lev_2_m_above_ground=on&lev_10_m_above_ground=on&subregion=&toplat=60&leftlon=-139&rightlon=-114&bottomlat=48' - assert actual_urls[1] == noaa.NAM_BASE_URL + 'dir=%2Fnam.20230309&file=nam.t12z.awip3232.tm00.grib2&var_APCP=on&var_RH=on&var_TMP=on&var_UGRD=on&var_VGRD=on&lev_surface=on&lev_2_m_above_ground=on&lev_10_m_above_ground=on&subregion=&toplat=60&leftlon=-139&rightlon=-114&bottomlat=48' - assert actual_urls[-1] == noaa.NAM_BASE_URL + 'dir=%2Fnam.20230309&file=nam.t12z.awip3257.tm00.grib2&var_APCP=on&var_RH=on&var_TMP=on&var_UGRD=on&var_VGRD=on&lev_surface=on&lev_2_m_above_ground=on&lev_10_m_above_ground=on&subregion=&toplat=60&leftlon=-139&rightlon=-114&bottomlat=48' + assert actual_urls[0] == noaa.NAM_BASE_URL + 'dir=%2Fnam.20230309&file=nam.t12z.awphys00.tm00.grib2&var_APCP=on&var_RH=on&var_TMP=on&var_UGRD=on&var_VGRD=on&lev_surface=on&lev_2_m_above_ground=on&lev_10_m_above_ground=on&subregion=&toplat=60&leftlon=-139&rightlon=-114&bottomlat=48' + assert actual_urls[1] == noaa.NAM_BASE_URL + 'dir=%2Fnam.20230309&file=nam.t12z.awphys06.tm00.grib2&var_APCP=on&var_RH=on&var_TMP=on&var_UGRD=on&var_VGRD=on&lev_surface=on&lev_2_m_above_ground=on&lev_10_m_above_ground=on&subregion=&toplat=60&leftlon=-139&rightlon=-114&bottomlat=48' + assert actual_urls[-1] == noaa.NAM_BASE_URL + 'dir=%2Fnam.20230309&file=nam.t12z.awphys81.tm00.grib2&var_APCP=on&var_RH=on&var_TMP=on&var_UGRD=on&var_VGRD=on&lev_surface=on&lev_2_m_above_ground=on&lev_10_m_above_ground=on&subregion=&toplat=60&leftlon=-139&rightlon=-114&bottomlat=48' def test_get_nam_model_run_download_urls_for_18_utc(): # Jan 27 at 18:00 UTC is still Jan 27 in Eastern timezone - should return URL for same day - # for the 18 UTC run, there should be 4 urls (for hours 02, 26, 48, 51) - expected_num_of_urls = 4 + # for the 18 UTC run, there should be 28 urls (for hours 0, 2, 3, 6, 9, 12, 15, 18, 21, 24, 26, 27, 30, 33, 36, 39, + # 42, 45, 48, 51, 54, 57, 60, 63, 66, 69, 72, 75) + expected_num_of_urls = 28 actual_urls = list(noaa.get_nam_model_run_download_urls(datetime(2023, 1, 27, 18, tzinfo=timezone.utc), '18')) assert len(actual_urls) == expected_num_of_urls - assert actual_urls[0] == noaa.NAM_BASE_URL + 'dir=%2Fnam.20230127&file=nam.t18z.awip3202.tm00.grib2&var_APCP=on&var_RH=on&var_TMP=on&var_UGRD=on&var_VGRD=on&lev_surface=on&lev_2_m_above_ground=on&lev_10_m_above_ground=on&subregion=&toplat=60&leftlon=-139&rightlon=-114&bottomlat=48' - assert actual_urls[1] == noaa.NAM_BASE_URL + 'dir=%2Fnam.20230127&file=nam.t18z.awip3226.tm00.grib2&var_APCP=on&var_RH=on&var_TMP=on&var_UGRD=on&var_VGRD=on&lev_surface=on&lev_2_m_above_ground=on&lev_10_m_above_ground=on&subregion=&toplat=60&leftlon=-139&rightlon=-114&bottomlat=48' - assert actual_urls[-1] == noaa.NAM_BASE_URL + 'dir=%2Fnam.20230127&file=nam.t18z.awip3251.tm00.grib2&var_APCP=on&var_RH=on&var_TMP=on&var_UGRD=on&var_VGRD=on&lev_surface=on&lev_2_m_above_ground=on&lev_10_m_above_ground=on&subregion=&toplat=60&leftlon=-139&rightlon=-114&bottomlat=48' + assert actual_urls[0] == noaa.NAM_BASE_URL + 'dir=%2Fnam.20230127&file=nam.t18z.awphys00.tm00.grib2&var_APCP=on&var_RH=on&var_TMP=on&var_UGRD=on&var_VGRD=on&lev_surface=on&lev_2_m_above_ground=on&lev_10_m_above_ground=on&subregion=&toplat=60&leftlon=-139&rightlon=-114&bottomlat=48' + assert actual_urls[1] == noaa.NAM_BASE_URL + 'dir=%2Fnam.20230127&file=nam.t18z.awphys02.tm00.grib2&var_APCP=on&var_RH=on&var_TMP=on&var_UGRD=on&var_VGRD=on&lev_surface=on&lev_2_m_above_ground=on&lev_10_m_above_ground=on&subregion=&toplat=60&leftlon=-139&rightlon=-114&bottomlat=48' + assert actual_urls[-1] == noaa.NAM_BASE_URL + 'dir=%2Fnam.20230127&file=nam.t18z.awphys75.tm00.grib2&var_APCP=on&var_RH=on&var_TMP=on&var_UGRD=on&var_VGRD=on&lev_surface=on&lev_2_m_above_ground=on&lev_10_m_above_ground=on&subregion=&toplat=60&leftlon=-139&rightlon=-114&bottomlat=48' def test_parse_url_for_timestamps_simple(): """ simple test case for noaa.parse_url_for_timestamps(): model_run_timestamp and prediction_timestamp are on the same day """ - url = noaa.NAM_BASE_URL + 'dir=%2Fnam.20230413&file=nam.t12z.awip3206.tm00.grib2&var_APCP=on&var_RH=on&var_TMP=on&var_UGRD=on&var_VGRD=on&lev_surface=on&lev_2_m_above_ground=on&lev_10_m_above_ground=on&subregion=&toplat=60&leftlon=-139&rightlon=-114&bottomlat=48' + url = noaa.NAM_BASE_URL + 'dir=%2Fnam.20230413&file=nam.t12z.awphys06.tm00.grib2&var_APCP=on&var_RH=on&var_TMP=on&var_UGRD=on&var_VGRD=on&lev_surface=on&lev_2_m_above_ground=on&lev_10_m_above_ground=on&subregion=&toplat=60&leftlon=-139&rightlon=-114&bottomlat=48' expected_model_run_timestamp = datetime(2023, 4, 13, 12, 0, tzinfo=timezone.utc) expected_prediction_timestamp = datetime(2023, 4, 13, 18, 0, tzinfo=timezone.utc) actual_model_run_timestamp, actual_prediction_timestamp = noaa.parse_nam_url_for_timestamps(url) @@ -66,7 +69,7 @@ def test_parse_url_for_timestamps_simple(): def test_parse_url_for_timestamps_complex(): """ more complex test case for noaa.parse_url_for_timestamps(): model_run_timestamp and prediction_timestamp are on different days """ - url = noaa.NAM_BASE_URL + 'dir=%2Fnam.20230413&file=nam.t18z.awip3227.grib2&var_APCP=on&var_RH=on&var_TMP=on&var_UGRD=on&var_VGRD=on&lev_surface=on&lev_2_m_above_ground=on&lev_10_m_above_ground=on&subregion=&toplat=60&leftlon=-139&rightlon=-114&bottomlat=48' + url = noaa.NAM_BASE_URL + 'dir=%2Fnam.20230413&file=nam.t18z.awphys27.grib2&var_APCP=on&var_RH=on&var_TMP=on&var_UGRD=on&var_VGRD=on&lev_surface=on&lev_2_m_above_ground=on&lev_10_m_above_ground=on&subregion=&toplat=60&leftlon=-139&rightlon=-114&bottomlat=48' expected_model_run_timestamp = datetime(2023, 4, 13, 18, 0, tzinfo=timezone.utc) expected_prediction_timestamp = datetime(2023, 4, 14, 21, 0, tzinfo=timezone.utc) actual_model_run_timestamp, actual_prediction_timestamp = noaa.parse_nam_url_for_timestamps(url)