Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Drought metric #749

Draft
wants to merge 41 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
aabf4ca
first init
lee1043 Apr 28, 2021
56fd307
Merge branch 'master' into 679_ljw_drought
lee1043 Jul 9, 2021
1fd4fba
drafting...
lee1043 Jul 9, 2021
a1edda7
progress and clean up
lee1043 Jul 10, 2021
fbe789a
Merge branch 'master' into 679_ljw_drought
lee1043 Aug 10, 2021
26624e8
Merge branch 'master' into 679_ljw_drought
lee1043 Sep 3, 2021
ccfa9c0
Merge branch 'master' into 679_ljw_drought
lee1043 Sep 29, 2021
a891c7a
Merge branch 'master' into 679_ljw_drought
lee1043 Oct 5, 2021
bca98eb
Merge branch 'master' into 679_ljw_drought
lee1043 Oct 19, 2021
698a958
Merge branch 'master' into 679_ljw_drought
lee1043 Oct 21, 2021
448c85a
Merge branch 'master' into 679_ljw_drought
lee1043 Nov 3, 2021
8cf561e
Merge branch 'master' into 679_ljw_drought
lee1043 Nov 16, 2021
6f9731a
Merge branch 'main' into 679_ljw_drought
lee1043 Nov 18, 2021
4e5fdda
Merge branch 'main' into 679_ljw_drought
lee1043 Nov 30, 2021
74fce90
Merge branch 'main' into 679_ljw_drought
lee1043 Nov 30, 2021
0f79707
Merge branch 'main' into 679_ljw_drought
lee1043 Dec 15, 2021
5fecf96
Merge branch 'main' into 679_ljw_drought
lee1043 Apr 1, 2022
0d2d48b
Merge branch 'main' into 679_ljw_drought
lee1043 Apr 2, 2022
218c6d4
Merge branch 'main' into 679_ljw_drought
lee1043 Apr 29, 2022
512fa48
Merge branch 'main' into 679_ljw_drought
lee1043 May 3, 2022
f61f257
Merge branch 'main' into 679_ljw_drought
lee1043 Aug 1, 2022
dcfd7dc
Merge branch 'main' into 679_ljw_drought
lee1043 Aug 22, 2022
5abcec5
Merge branch 'main' into 679_ljw_drought
lee1043 Oct 11, 2022
31590ec
Merge branch 'main' into 679_ljw_drought
lee1043 Oct 12, 2022
b276a41
Merge branch 'main' into 679_ljw_drought
lee1043 Oct 13, 2022
5c99dd1
Merge branch 'main' into 679_ljw_drought
lee1043 Oct 15, 2022
d28a6a1
Merge branch 'main' into 679_ljw_drought
lee1043 Nov 8, 2022
c6f595a
Merge branch 'main' into 679_ljw_drought
lee1043 Nov 15, 2022
5e7d622
Merge branch 'main' into 679_ljw_drought
lee1043 Feb 16, 2023
2b41ef2
Merge branch 'main' into 679_ljw_drought
lee1043 Feb 23, 2023
1757e7d
Merge branch 'main' into 679_ljw_drought
lee1043 Apr 15, 2023
86433f3
Merge branch 'main' into 679_ljw_drought
lee1043 Apr 28, 2023
7196b7b
Merge branch 'main' into 679_ljw_drought
lee1043 Jun 1, 2023
00faf49
Merge branch 'main' into 679_ljw_drought
lee1043 Jun 29, 2023
4645d35
Merge branch 'main' into 679_ljw_drought
lee1043 Oct 27, 2023
c09be2a
Merge branch 'main' into 679_ljw_drought
lee1043 Nov 7, 2023
11fa0ff
Merge branch 'main' into 679_ljw_drought
lee1043 Nov 21, 2023
e7c1761
Merge branch 'main' into 679_ljw_drought
lee1043 Dec 18, 2023
58fd3cf
Merge branch 'main' into 679_ljw_drought
lee1043 Jan 26, 2024
77af309
Merge branch 'main' into 679_ljw_drought
lee1043 Feb 27, 2024
7a761bc
Merge branch 'main' into 679_ljw_drought
lee1043 Apr 24, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pcmdi_metrics/drought/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Drought metrics
Empty file.
39 changes: 39 additions & 0 deletions pcmdi_metrics/drought/drought_metric_driver.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import xarray
import pandas as pd
from pcmdi_metrics.drought.lib import spi
from pcmdi_metrics.drought.lib import cdd
import matplotlib as mpl

# driver to call SPI and CDD

# PMP PARSER READIN

# LOOP FOR OBS or MODELS
# LOOP FOR RUNS
# CALL SPI -- calculated for each grid --> output to nc file
# Input: monthly precipitation over land (OBS: CPC over CONUS)
# CALL CDD -- calculated for each grid --> output to nc file
# Input: daily precipitation over land

# OUTPUT TO JSON (which output to JSON??)


# The input data should be monthly mean precipitaion. The data used here can be downloaded at https://psl.noaa.gov/data/gridded/data.unified.daily.conus.html
observe_path = "/Users/lee1043/Documents/Research/DATA/CPC/precip.V1.0.mon.mean.nc"
observe = xarray.open_dataset(observe_path)
observe_df = observe.to_dataframe().reset_index()
observe_regional_mean = observe_df.groupby('time').mean().reset_index()
date = observe_regional_mean.time

# Build a dataframe to save the SPI3, SPI6, SPI12 and SPI36 of the regional mean monthly precipitation
observe_regional_spi = pd.DataFrame(columns=['SPI3','SPI6','SPI12','SPI36'], index=date)
for scale, col in zip([3,6,12,36], observe_regional_spi.columns):
observe_regional_spi[col] = spi(observe_regional_mean.precip,observe_regional_mean.precip, scale)
observe_regional_spi = observe_regional_spi.reset_index()

# Here I plot the regional mean SPI6 over the total period
fig, (ax) = plt.subplots(nrows=1, ncols=1, sharex=True,sharey=True,figsize=(20,8))
ax.fill_between(observe_regional_spi.time, 0, observe_regional_spi.SPI6, where=observe_regional_spi.SPI6 >= 0, facecolor='blue', interpolate=True)
ax.fill_between(observe_regional_spi.time, 0, observe_regional_spi.SPI6, where=observe_regional_spi.SPI6 < 0, facecolor='red', interpolate=True)
ax.set_ylabel('Regional Mean SPI6 over CONUS',fontsize=22)
ax.set_xlabel("Date",fontsize=22)
2 changes: 2 additions & 0 deletions pcmdi_metrics/drought/lib/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from .lib_spi import spi #noqa
from .lib_cdd import cdd #noqa
44 changes: 44 additions & 0 deletions pcmdi_metrics/drought/lib/lib_cdd.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import numpy as np
import pandas as pd

#cdd is the function to calculate the CDD, the output is a dataframe grouped by lat lon and time_period.

# data_df should be the input daily precipitation dataframe consists of lat, lon, time_period and daily precipitation.
# pr_name is the variable name of precipitation
# period_name is the variable name of period used. like 'year' or 'month'.

def cdd(data_df,pr_name,period_name):
data_df['Dryness'] = np.where(data_df[pr_name]>=1,'non-dry','dry')
dry_duration = pd.DataFrame(data_df.groupby(['lat','lon',period_name,data_df['Dryness'],(data_df['Dryness']!=data_df['Dryness'].shift()).cumsum()]).size().reset_index(level=4,drop=True).rename('duration'))
dry_duration_reset_index = dry_duration.reset_index()
dry_duration_reset_index = dry_duration_reset_index[dry_duration_reset_index.Dryness=='dry']
CDD_each_grid = pd.DataFrame(dry_duration_reset_index.groupby(['lat','lon',period_name]).duration.max())
CDD_each_grid = CDD_each_grid.rename(columns={'duration':"CDD"})
return CDD_each_grid.reset_index()

#score_map is a function to plot a variable on map

# data should be the input dataframe.
# variable_name is varable name of the vairable used like CDD.

def score_map(data,variable_name):
val_pivot_df = data.pivot(index='lat', columns='lon', values=variable_name)
lons = val_pivot_df.columns.values
lats = val_pivot_df.index.values
fig, ax = plt.subplots(1, figsize=(8, 8))
m = Basemap(projection='merc',
llcrnrlat=data.dropna().min().lat-2,
urcrnrlat=data.dropna().max().lat+2,
llcrnrlon=data.dropna().min().lon-2,
urcrnrlon=data.dropna().max().lon+2,
resolution='i', area_thresh=10000)
m.drawcoastlines()
m.drawstates()
m.drawcountries()
x, y = np.meshgrid(lons,lats)
px, py = m(x,y)
data_values = val_pivot_df.values
masked_data = np.ma.masked_invalid(data_values)
cmap = plt.cm.viridis
m.pcolormesh(px, py, masked_data, cmap=cmap, shading='flat')
m.colorbar(label=variable_name)
92 changes: 92 additions & 0 deletions pcmdi_metrics/drought/lib/lib_spi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
import numpy as np
import scipy.stats
# Note that the calibration and calculation data can be the same but they should both be 1-d monthly mean precipitation array and start from January.


def accumulation(data, temporal_scale):
# calculate the accumulated n months (temporal_scale) precipitation data
data_accumulated = np.convolve(data, np.ones(temporal_scale), 'valid')
# set the value to NANs for the first n-1 months because there isn't accumulated n months values at the first n-1 months
data_accumulated = np.concatenate([[np.nan]*(temporal_scale-1), data_accumulated])

# If the last year is incomplete, let's set NAN to the months without values in the last year
n_year_incomplete = len(data_accumulated)/12 - int(len(data_accumulated)/12)
n_month_incomplete = int(n_year_incomplete * 12)
data_accumulated = np.concatenate([data_accumulated,[np.nan]*(n_month_incomplete)])
# Then reshape the 1-d array to 2-d array with the shape of (the number of years, 12) becasue the SPI's distribution and calculation should be based on each month.
data_accumulated_reshape = data_accumulated.reshape(int(len(data_accumulated)/12), 12)
return data_accumulated_reshape


def gamma_transformation(calibration,calculation, temporal_scale):
# calculate the alphas and betas at each month based on calibration data
means = np.nanmean(calibration, axis=0)
a = np.log(means) - np.nanmean(np.log(calibration), axis=0)
alphas = (1 + np.sqrt(1 + 4 * a / 3)) / (4 * a)
betas = means / alphas
# calculate the probability of 0 values
n_zero = (calculation ==0).sum()
n_values = np.count_nonzero(~np.isnan(calculation)) + n_zero
p_zero = n_zero / len(calculation)

# set the 0 value as NAN
calculation[calculation == 0] = np.NaN

# build gamma distribution based on the non-zero values of calibration data
# calculate the probability and the values of calculation data under gamma distribution built by calibration data
p_gamma_non_zero = scipy.stats.gamma.cdf(calculation, a=alphas, scale=betas)
p_gamma = p_zero + ( 1 - p_zero ) * p_gamma_non_zero
calculation_transformed = scipy.stats.norm.ppf(p_gamma)
# change the shape to 1-d array
calculation_transformed = calculation_transformed.flatten()
return calculation_transformed


def spi(precipitation_calibration:np.ndarray,
precipitation_calculation:np.ndarray,
temporal_scale:int,
distribution='gamma',
upper_limit=3.09,
lower_limit=-3.09):

# precipitation_calibration: It should be the 1-d monthly precipitation array used to calibrate the distribution. It must start from January and can be the same as precipitation_calculate.
# precipitation_calculate: It should be the 1-d monthly precipitation array used to calculate the SPI_n. It must start from Januaray too.
# temporal_scale: It's the temporal scale of SPI_n which indicates the number of n months used to calculate accumulated SPI. For example, to calculate SPI3, it should be 3.
# distribution: The distribution of SPI and right now it only supports gamma distribution which is the most commonly used one.
# upper_limit, lower_limit: The maximum and minimum SPI values, according to https://onlinelibrary.wiley.com/doi/abs/10.1111/j.1752-1688.1999.tb03592.x , the default values are 3.09 and -3.09.

"""
# test if there is missing and negative value
if np.all(np.isnan(precipitation_calibration)):
print("calibration data cannot be all NAN!")
if np.all(np.isnan(precipitation_calculation)):
print("calculation data cannot be all NAN!")
"""

if precipitation_calibration[~np.isnan(precipitation_calibration)].min()<0:
print("Calibration data must be positive! All negative values will be assign to be 0")
precipitation_calibration.clip(min=0)

if precipitation_calculation[~np.isnan(precipitation_calculation)].min()<0:
print("Calculation data must be positive! All negative values will be assign to be 0")
precipitation_calculation.clip(min=0)

# calculate the length of input calculation data
length_input = len(precipitation_calibration)

# calculate the accumulated precipitation of calibration and calculation data, then reshape them to 2-d array.
precipitation_calibration_accumulated = accumulation(precipitation_calibration, temporal_scale)
precipitation_calculation_accumulated = accumulation(precipitation_calculation, temporal_scale)

# apply the gamma transformation to accumulated data
if distribution == 'gamma':
spi_n = gamma_transformation(precipitation_calibration_accumulated,
precipitation_calculation_accumulated,
temporal_scale)

# clip the spi values
spi_n = np.clip(spi_n,a_min=lower_limit , a_max=upper_limit)

# change spi_n to its original length
spi_n = spi_n[0:length_input]
return spi_n
Loading