From 210e4e156136ace8bfa402b98e51726113785342 Mon Sep 17 00:00:00 2001
From: Jeevesh8 <creativityinczenyoga@gmail.com>
Date: Thu, 21 May 2020 12:33:41 +0530
Subject: [PATCH] Baseline Smart Persistence model added

---
 README.md            | 16 +++++++-
 smart_persistence.py | 87 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 102 insertions(+), 1 deletion(-)
 create mode 100644 smart_persistence.py
diff --git a/README.md b/README.md
index dfd20f5..8f6b796 100644
--- a/README.md
+++ b/README.md
@@ -98,6 +98,18 @@ python shift_ghi.py --ghi val1 val2 val3 val4
 ```
 python shift_ghi.py --ghi_time_file <pickle-file-path> --write_to <append|out-pickle-file-path>
 ```
+## Using smart_persistence.py (Baseline Model)
+
+1.) Provides smart persistence[Pedro and Coimbra, 2012] (https://www.sciencedirect.com/science/article/abs/pii/S0038092X12001429) predictions and accuracy metrics.
+
+```
+python smart_persistence.py --loss <mse|mae|mbe|mape> --tr_start_year <training-start-year>
+                            --tr_final_year <training-final-year> --test_start_year <year>
+                            --test_final_year <year> --root_dir <directory-having-data-files>
+                            --steps <steps-b/w-consecutive-preds-for-test-data> --get_preds
+```
+
+Omitting the ```--get_preds``` flag will cause only the loss to print.
 
 ## Example Commands :- 
 
@@ -138,4 +150,6 @@ python Infer.py --mode predict_list --model trfrmr --ini_len 15 --final_len 12\
                  --test_year 14 --times_to_run 10 --gamma_list 0.95 0.9 0.5 0.05 0.1 0.5
 ```
 
-**NOTE** :- Currently you can't predict GHI for more steps than you trained for as you'd need weather data of those steps and hence just using prediction for previous n steps to predict for n steps after those will not work. 
+**NOTE** :- Currently you can't predict GHI for more future steps(parallely) than you trained for as you'd need weather data of those steps and hence just using prediction for previous n steps to predict for n steps after those will not work. 
+
+All the years, i.e., ```--tr_start_year, --val_final_year``` etc. are integers from 0 to n-1 where n is the number of years(or files) in your root directory. Each file must correspond to single year.
diff --git a/smart_persistence.py b/smart_persistence.py
new file mode 100644
index 0000000..991978b
--- /dev/null
+++ b/smart_persistence.py
@@ -0,0 +1,87 @@
+import argparse
+import numpy as np
+from scipy.interpolate import griddata
+import pandas as pd
+
+def date_to_nth_day(year, month, day):
+    date = pd.Timestamp(year=year,month=month,day=day)
+    new_year_day = pd.Timestamp(year=year, month=1, day=1)
+    return (date - new_year_day).days + 1
+
+def get_df(csv_paths) :
+    df_lis = []
+    for path in csv_paths :
+        df_lis.append(pd.read_csv(path))
+    final_df = pd.concat(df_lis,ignore_index=True).drop(['Unnamed: 0'],axis=1)
+    return final_df
+
+def day_passed_ratio(hour, minute) :
+    return (hour*60+minute)/24*60
+
+def caller(series) :
+    series['nthDay'] = int(date_to_nth_day(series['Year'], series['Month'], series['Day']))
+    series['diff_hours'] = day_passed_ratio(series['Hour'], series['Minute'])
+    return series
+
+def lossfn(a, b, loss='mse') :
+     if loss == 'mse' :
+        return (a-b)*(a-b)
+    elif loss == 'mape' :
+        return np.abs(a-b)/np.abs(b)
+    elif loss == 'mae' :
+        return np.abs(a-b)
+    elif loss == 'mbe' :
+        return a-b
+
+if __name__ == '__main__' :
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--loss', default='mse', help='Choose from mse, mbe, mae, mape')
+    parser.add_argument('--test_start_year', type=int, help='Starting test year. Use only when mode is avg_loss')
+    parser.add_argument('--test_final_year', type=int, help='Final test year. Use only when mode is avg_loss.')
+    parser.add_argument('--tr_start_year', type=int, help='Training Start year')
+    parser.add_argument('--tr_final_year', type=int, help='Training Final year')
+    parser.add_argument('--root_dir')
+    parser.add_argument('--steps', type=int, default=1, help='How many values do you want to skip b/w 2 consecutive predictions?')
+    parser.add_argument('--get_preds', action='store_true', help='Set this flag if you want to get predictions of Smart Persistence')
+    
+    csv_paths=[root_dir+'Data'+str(i)+'.csv' for i in range(tr_start_year, tr_end_year+1)]
+    final_df = get_df(csv_paths)
+    csv_paths=[root_dir+'Data'+str(i)+'.csv' for i in range(val_start_year, val_end_year+1)]
+    val_final_df = get_df(csv_paths)
+
+    final_df['nthDay'] = np.nan
+    final_df['diff_hours'] = np.nan
+    final_df = final_df.apply(caller, axis=1)
+
+    val_final_df['nthDay'] = np.nan
+    val_final_df['diff_hours'] = np.nan
+    val_final_df = final_df.apply(caller, axis=1)
+
+    final_df = final_df[['GHI', 'nthDay', 'diff_hours']]
+    
+    values = final_df.groupby(['nthDay','diff_hours']).mean()
+    values = values.reset_index()
+
+    points = values[['nthDay', 'diff_hours']].to_numpy()
+    ghi_values = values[['GHI']].to_numpy()
+
+    points_to_interpolate_to = val_final_df[['nthDay', 'diff_hours']][::args.steps].to_numpy()
+    real_ghi_vals = val_final_df[['GHI']][::args.steps].to_numpy()
+
+    clear_sky_preds = griddata(points, ghi_values, points_to_interpolate_to, method='linear')
+    
+    smart_persistence_preds = []
+    loss = 0
+    for i in range(len(real_ghi_vals)) :
+        if i==0 :
+            continue
+        if clear_sky_preds[i-1]==0 :
+            smart_persistence_preds.append(clear_sky_preds[i])
+        else :
+            smart_persistence_preds.append( (clear_sky_preds[i]*real_ghi_vals[i-1])/clear_sky_preds[i-1])
+        loss += lossfn(smart_persistence_preds[i-1], real_ghi_vals[i], args.loss)
+
+    if arge.get_preds :
+        print(smart_persistence_preds)
+    
+    print("Loss=", loss)    
\ No newline at end of file