PCMDI · lee1043 · Apr 28, 2021 · Jul 9, 2021 · Jul 9, 2021 · Jul 10, 2021
diff --git a/pcmdi_metrics/drought/README.md b/pcmdi_metrics/drought/README.md
@@ -0,0 +1 @@
+Drought metrics
diff --git a/pcmdi_metrics/drought/__init__.py b/pcmdi_metrics/drought/__init__.py
diff --git a/pcmdi_metrics/drought/drought_metric_driver.py b/pcmdi_metrics/drought/drought_metric_driver.py
@@ -0,0 +1,39 @@
+import xarray
+import pandas as pd
+from pcmdi_metrics.drought.lib import spi
+from pcmdi_metrics.drought.lib import cdd
+import matplotlib as mpl
+
+# driver to call SPI and CDD
+
+# PMP PARSER READIN
+
+# LOOP FOR OBS or MODELS
+ # LOOP FOR RUNS
+ # CALL SPI -- calculated for each grid --> output to nc file
+ # Input: monthly precipitation over land (OBS: CPC over CONUS)
+ # CALL CDD -- calculated for each grid --> output to nc file
+ # Input: daily precipitation over land
+
+# OUTPUT TO JSON (which output to JSON??)
+
+
+# The input data should be monthly mean precipitaion. The data used here can be downloaded at https://psl.noaa.gov/data/gridded/data.unified.daily.conus.html
+observe_path = "/Users/lee1043/Documents/Research/DATA/CPC/precip.V1.0.mon.mean.nc"
+observe = xarray.open_dataset(observe_path)
+observe_df = observe.to_dataframe().reset_index()
+observe_regional_mean = observe_df.groupby('time').mean().reset_index()
+date = observe_regional_mean.time
+
+# Build a dataframe to save the SPI3, SPI6, SPI12 and SPI36 of the regional mean monthly precipitation
+observe_regional_spi = pd.DataFrame(columns=['SPI3','SPI6','SPI12','SPI36'], index=date)
+for scale, col in zip([3,6,12,36], observe_regional_spi.columns):
+ observe_regional_spi[col] = spi(observe_regional_mean.precip,observe_regional_mean.precip, scale) 
+observe_regional_spi = observe_regional_spi.reset_index()
+
+# Here I plot the regional mean SPI6 over the total period
+fig, (ax) = plt.subplots(nrows=1, ncols=1, sharex=True,sharey=True,figsize=(20,8))
+ax.fill_between(observe_regional_spi.time, 0, observe_regional_spi.SPI6, where=observe_regional_spi.SPI6 >= 0, facecolor='blue', interpolate=True)
+ax.fill_between(observe_regional_spi.time, 0, observe_regional_spi.SPI6, where=observe_regional_spi.SPI6 < 0, facecolor='red', interpolate=True)
+ax.set_ylabel('Regional Mean SPI6 over CONUS',fontsize=22)
+ax.set_xlabel("Date",fontsize=22)
diff --git a/pcmdi_metrics/drought/lib/__init__.py b/pcmdi_metrics/drought/lib/__init__.py
@@ -0,0 +1,2 @@
+from .lib_spi import spi #noqa
+from .lib_cdd import cdd #noqa
diff --git a/pcmdi_metrics/drought/lib/lib_cdd.py b/pcmdi_metrics/drought/lib/lib_cdd.py
@@ -0,0 +1,44 @@
+import numpy as np
+import pandas as pd
+
+#cdd is the function to calculate the CDD, the output is a dataframe grouped by lat lon and time_period.
+
+# data_df should be the input daily precipitation dataframe consists of lat, lon, time_period and daily precipitation.
+# pr_name is the variable name of precipitation
+# period_name is the variable name of period used. like 'year' or 'month'.
+
+def cdd(data_df,pr_name,period_name):
+ data_df['Dryness'] = np.where(data_df[pr_name]>=1,'non-dry','dry')
+ dry_duration = pd.DataFrame(data_df.groupby(['lat','lon',period_name,data_df['Dryness'],(data_df['Dryness']!=data_df['Dryness'].shift()).cumsum()]).size().reset_index(level=4,drop=True).rename('duration'))
+ dry_duration_reset_index = dry_duration.reset_index()
+ dry_duration_reset_index = dry_duration_reset_index[dry_duration_reset_index.Dryness=='dry']
+ CDD_each_grid = pd.DataFrame(dry_duration_reset_index.groupby(['lat','lon',period_name]).duration.max())
+ CDD_each_grid = CDD_each_grid.rename(columns={'duration':"CDD"})
+ return CDD_each_grid.reset_index()
+
+#score_map is a function to plot a variable on map
+
+# data should be the input dataframe.
+# variable_name is varable name of the vairable used like CDD.
+
+def score_map(data,variable_name):
+ val_pivot_df = data.pivot(index='lat', columns='lon', values=variable_name)
+ lons = val_pivot_df.columns.values
+ lats = val_pivot_df.index.values
+ fig, ax = plt.subplots(1, figsize=(8, 8))
+ m = Basemap(projection='merc',
+ llcrnrlat=data.dropna().min().lat-2,
+ urcrnrlat=data.dropna().max().lat+2,
+ llcrnrlon=data.dropna().min().lon-2,
+ urcrnrlon=data.dropna().max().lon+2,
+ resolution='i', area_thresh=10000)
+ m.drawcoastlines()
+ m.drawstates()
+ m.drawcountries()
+ x, y = np.meshgrid(lons,lats) 
+ px, py = m(x,y) 
+ data_values = val_pivot_df.values
+ masked_data = np.ma.masked_invalid(data_values)
+ cmap = plt.cm.viridis
+ m.pcolormesh(px, py, masked_data, cmap=cmap, shading='flat')
+ m.colorbar(label=variable_name)
diff --git a/pcmdi_metrics/drought/lib/lib_spi.py b/pcmdi_metrics/drought/lib/lib_spi.py
@@ -0,0 +1,92 @@
+import numpy as np
+import scipy.stats
+# Note that the calibration and calculation data can be the same but they should both be 1-d monthly mean precipitation array and start from January.
+
+
+def accumulation(data, temporal_scale): 
+ # calculate the accumulated n months (temporal_scale) precipitation data
+ data_accumulated = np.convolve(data, np.ones(temporal_scale), 'valid')
+ # set the value to NANs for the first n-1 months because there isn't accumulated n months values at the first n-1 months
+ data_accumulated = np.concatenate([[np.nan]*(temporal_scale-1), data_accumulated])
+
+ # If the last year is incomplete, let's set NAN to the months without values in the last year
+ n_year_incomplete = len(data_accumulated)/12 - int(len(data_accumulated)/12)
+ n_month_incomplete = int(n_year_incomplete * 12)
+ data_accumulated = np.concatenate([data_accumulated,[np.nan]*(n_month_incomplete)])
+ # Then reshape the 1-d array to 2-d array with the shape of (the number of years, 12) becasue the SPI's distribution and calculation should be based on each month.
+ data_accumulated_reshape = data_accumulated.reshape(int(len(data_accumulated)/12), 12)
+ return data_accumulated_reshape
+
+
+def gamma_transformation(calibration,calculation, temporal_scale):
+ # calculate the alphas and betas at each month based on calibration data
+ means = np.nanmean(calibration, axis=0)
+ a = np.log(means) - np.nanmean(np.log(calibration), axis=0)
+ alphas = (1 + np.sqrt(1 + 4 * a / 3)) / (4 * a)
+ betas = means / alphas
+ # calculate the probability of 0 values
+ n_zero = (calculation ==0).sum()
+ n_values = np.count_nonzero(~np.isnan(calculation)) + n_zero
+ p_zero = n_zero / len(calculation)
+
+ # set the 0 value as NAN
+ calculation[calculation == 0] = np.NaN
+
+ # build gamma distribution based on the non-zero values of calibration data
+ # calculate the probability and the values of calculation data under gamma distribution built by calibration data
+ p_gamma_non_zero = scipy.stats.gamma.cdf(calculation, a=alphas, scale=betas)
+ p_gamma = p_zero + ( 1 - p_zero ) * p_gamma_non_zero
+ calculation_transformed = scipy.stats.norm.ppf(p_gamma)
+ # change the shape to 1-d array
+ calculation_transformed = calculation_transformed.flatten()
+ return calculation_transformed
+
+
+def spi(precipitation_calibration:np.ndarray,
+ precipitation_calculation:np.ndarray,
+ temporal_scale:int,
+ distribution='gamma',
+ upper_limit=3.09,
+ lower_limit=-3.09):
+
+ # precipitation_calibration: It should be the 1-d monthly precipitation array used to calibrate the distribution. It must start from January and can be the same as precipitation_calculate. 
+ # precipitation_calculate: It should be the 1-d monthly precipitation array used to calculate the SPI_n. It must start from Januaray too.
+ # temporal_scale: It's the temporal scale of SPI_n which indicates the number of n months used to calculate accumulated SPI. For example, to calculate SPI3, it should be 3.
+ # distribution: The distribution of SPI and right now it only supports gamma distribution which is the most commonly used one.
+ # upper_limit, lower_limit: The maximum and minimum SPI values, according to https://onlinelibrary.wiley.com/doi/abs/10.1111/j.1752-1688.1999.tb03592.x , the default values are 3.09 and -3.09.
+
+ """
+ # test if there is missing and negative value 
+ if np.all(np.isnan(precipitation_calibration)):
+ print("calibration data cannot be all NAN!")
+ if np.all(np.isnan(precipitation_calculation)):
+ print("calculation data cannot be all NAN!")
+ """
+
+ if precipitation_calibration[~np.isnan(precipitation_calibration)].min()<0:
+ print("Calibration data must be positive! All negative values will be assign to be 0")
+ precipitation_calibration.clip(min=0)
+
+ if precipitation_calculation[~np.isnan(precipitation_calculation)].min()<0:
+ print("Calculation data must be positive! All negative values will be assign to be 0")
+ precipitation_calculation.clip(min=0)
+
+ # calculate the length of input calculation data
+ length_input = len(precipitation_calibration)
+
+ # calculate the accumulated precipitation of calibration and calculation data, then reshape them to 2-d array.
+ precipitation_calibration_accumulated = accumulation(precipitation_calibration, temporal_scale)
+ precipitation_calculation_accumulated = accumulation(precipitation_calculation, temporal_scale)
+
+ # apply the gamma transformation to accumulated data
+ if distribution == 'gamma':
+ spi_n = gamma_transformation(precipitation_calibration_accumulated,
+ precipitation_calculation_accumulated,
+ temporal_scale)
+
+ # clip the spi values 
+ spi_n = np.clip(spi_n,a_min=lower_limit , a_max=upper_limit)
+
+ # change spi_n to its original length 
+ spi_n = spi_n[0:length_input]
+ return spi_n