From 210e4e156136ace8bfa402b98e51726113785342 Mon Sep 17 00:00:00 2001 From: Jeevesh8 Date: Thu, 21 May 2020 12:33:41 +0530 Subject: [PATCH] Baseline Smart Persistence model added --- README.md | 16 +++++++- smart_persistence.py | 87 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 102 insertions(+), 1 deletion(-) create mode 100644 smart_persistence.py diff --git a/README.md b/README.md index dfd20f5..8f6b796 100644 --- a/README.md +++ b/README.md @@ -98,6 +98,18 @@ python shift_ghi.py --ghi val1 val2 val3 val4 ``` python shift_ghi.py --ghi_time_file --write_to ``` +## Using smart_persistence.py (Baseline Model) + +1.) Provides smart persistence[Pedro and Coimbra, 2012] (https://www.sciencedirect.com/science/article/abs/pii/S0038092X12001429) predictions and accuracy metrics. + +``` +python smart_persistence.py --loss --tr_start_year + --tr_final_year --test_start_year + --test_final_year --root_dir + --steps --get_preds +``` + +Omitting the ```--get_preds``` flag will cause only the loss to print. ## Example Commands :- @@ -138,4 +150,6 @@ python Infer.py --mode predict_list --model trfrmr --ini_len 15 --final_len 12\ --test_year 14 --times_to_run 10 --gamma_list 0.95 0.9 0.5 0.05 0.1 0.5 ``` -**NOTE** :- Currently you can't predict GHI for more steps than you trained for as you'd need weather data of those steps and hence just using prediction for previous n steps to predict for n steps after those will not work. +**NOTE** :- Currently you can't predict GHI for more future steps(parallely) than you trained for as you'd need weather data of those steps and hence just using prediction for previous n steps to predict for n steps after those will not work. + +All the years, i.e., ```--tr_start_year, --val_final_year``` etc. are integers from 0 to n-1 where n is the number of years(or files) in your root directory. Each file must correspond to single year. diff --git a/smart_persistence.py b/smart_persistence.py new file mode 100644 index 0000000..991978b --- /dev/null +++ b/smart_persistence.py @@ -0,0 +1,87 @@ +import argparse +import numpy as np +from scipy.interpolate import griddata +import pandas as pd + +def date_to_nth_day(year, month, day): + date = pd.Timestamp(year=year,month=month,day=day) + new_year_day = pd.Timestamp(year=year, month=1, day=1) + return (date - new_year_day).days + 1 + +def get_df(csv_paths) : + df_lis = [] + for path in csv_paths : + df_lis.append(pd.read_csv(path)) + final_df = pd.concat(df_lis,ignore_index=True).drop(['Unnamed: 0'],axis=1) + return final_df + +def day_passed_ratio(hour, minute) : + return (hour*60+minute)/24*60 + +def caller(series) : + series['nthDay'] = int(date_to_nth_day(series['Year'], series['Month'], series['Day'])) + series['diff_hours'] = day_passed_ratio(series['Hour'], series['Minute']) + return series + +def lossfn(a, b, loss='mse') : + if loss == 'mse' : + return (a-b)*(a-b) + elif loss == 'mape' : + return np.abs(a-b)/np.abs(b) + elif loss == 'mae' : + return np.abs(a-b) + elif loss == 'mbe' : + return a-b + +if __name__ == '__main__' : + parser = argparse.ArgumentParser() + parser.add_argument('--loss', default='mse', help='Choose from mse, mbe, mae, mape') + parser.add_argument('--test_start_year', type=int, help='Starting test year. Use only when mode is avg_loss') + parser.add_argument('--test_final_year', type=int, help='Final test year. Use only when mode is avg_loss.') + parser.add_argument('--tr_start_year', type=int, help='Training Start year') + parser.add_argument('--tr_final_year', type=int, help='Training Final year') + parser.add_argument('--root_dir') + parser.add_argument('--steps', type=int, default=1, help='How many values do you want to skip b/w 2 consecutive predictions?') + parser.add_argument('--get_preds', action='store_true', help='Set this flag if you want to get predictions of Smart Persistence') + + csv_paths=[root_dir+'Data'+str(i)+'.csv' for i in range(tr_start_year, tr_end_year+1)] + final_df = get_df(csv_paths) + csv_paths=[root_dir+'Data'+str(i)+'.csv' for i in range(val_start_year, val_end_year+1)] + val_final_df = get_df(csv_paths) + + final_df['nthDay'] = np.nan + final_df['diff_hours'] = np.nan + final_df = final_df.apply(caller, axis=1) + + val_final_df['nthDay'] = np.nan + val_final_df['diff_hours'] = np.nan + val_final_df = final_df.apply(caller, axis=1) + + final_df = final_df[['GHI', 'nthDay', 'diff_hours']] + + values = final_df.groupby(['nthDay','diff_hours']).mean() + values = values.reset_index() + + points = values[['nthDay', 'diff_hours']].to_numpy() + ghi_values = values[['GHI']].to_numpy() + + points_to_interpolate_to = val_final_df[['nthDay', 'diff_hours']][::args.steps].to_numpy() + real_ghi_vals = val_final_df[['GHI']][::args.steps].to_numpy() + + clear_sky_preds = griddata(points, ghi_values, points_to_interpolate_to, method='linear') + + smart_persistence_preds = [] + loss = 0 + for i in range(len(real_ghi_vals)) : + if i==0 : + continue + if clear_sky_preds[i-1]==0 : + smart_persistence_preds.append(clear_sky_preds[i]) + else : + smart_persistence_preds.append( (clear_sky_preds[i]*real_ghi_vals[i-1])/clear_sky_preds[i-1]) + loss += lossfn(smart_persistence_preds[i-1], real_ghi_vals[i], args.loss) + + if arge.get_preds : + print(smart_persistence_preds) + + print("Loss=", loss) \ No newline at end of file