-
Notifications
You must be signed in to change notification settings - Fork 1
/
Postprocess.py
138 lines (110 loc) · 4.48 KB
/
Postprocess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
"""Postprocess forecasts (statistics, plots, etc.)"""
import os
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
filenames = ["forecasts_5min_BNI.h5", "forecasts_5min_GHI.h5", "forecasts_10min_BNI.h5", "forecasts_10min_GHI.h5"
, "forecasts_15min_BNI.h5", "forecasts_15min_GHI.h5", "forecasts_20min_BNI.h5", "forecasts_20min_GHI.h5"
, "forecasts_25min_BNI.h5", "forecasts_25min_GHI.h5", "forecasts_30min_BNI.h5", "forecasts_30min_GHI.h5"]
def summary_stats(target, filenames, baseline="sp"):
"""Compute summary statistics (MAE, MBE, etc.).
Parameters
----------
target : str {"ghi", "dni"}
Target variable.
filenames : list
List of filenames (relative or absolute) containing forecast time-series.
baseline : str
The baseline forecast to compare against.
Returns
-------
df : pandas.DataFrame
Summary statistics (MAE, MBE, etc.) by horizon, dataset, feature set,
and model.
"""
results = []
for filename in filenames:
df = pd.read_hdf(filename, "df")
# essential metadata
horizon = df["horizon"].values[0]
#print(df.describe())
# error metrics
for dataset, group in df.groupby("dataset"):
for model in [baseline, "ols", "ridge", "lasso"]:
# error metrics [W/m^2]
error = (group["Pdc_{}_actual".format(target)] - group["Pdc_{}_{}".format(target, model)]).values
mae = np.nanmean(np.abs(error))
mbe = np.nanmean(error)
rmse = np.sqrt(np.nanmean(error ** 2))
# forecast skill [-]:
#
# s = 1 - RMSE_f / RMSE_p
#
# where RMSE_f and RMSE_p are the RMSE of the forecast model
# and reference baseline model, respectively. (smart persistance model)
rmse_p = np.sqrt(
np.mean((group["Pdc_{}_actual".format(target)] - group["Pdc_{}_{}".format(target, baseline)]) ** 2)
)
skill = 1.0 - rmse / rmse_p
results.append(
{
"dataset": dataset, # Train/Test
"horizon": horizon, # 5min, 10min, etc.
"model": model,
"MAE": mae,
"MBE": mbe,
"RMSE": rmse,
"skill": skill,
"baseline": baseline, # the baseline forecast name
}
)
# return as a DataFrame
return pd.DataFrame(results)
def summarize(target):
"""Summarize the forecasts, per horizon range."""
# intra-hour: 5-30min ahead
df = summary_stats(
target,
glob.glob(
os.path.join(
"forecasts", "forecasts_*{}.h5".format(target)
)
),
baseline="sp",
)
df.to_hdf("results_{}_intra-hour.h5".format(target), "df", mode="w")
print("Intra-hour ({}): {}".format(target, df.shape))
""" # intra-day: 30min to 3h ahead
df = summary_stats(
target,
glob.glob(
os.path.join(
"forecasts", "forecasts_intra-day*{}.h5".format(target)
)
),
baseline="sp",
)
df.to_hdf("results_{}_intra-day.h5".format(target), "df", mode="w")
print("Intra-day ({}): {}".format(target, df.shape)"""
def summary_table(target):
"""Summary table for paper."""
# results
df = pd.read_hdf("results_{}_intra-hour.h5".format(target), "df")
df = df[df["dataset"] == "Test"]
df.to_csv("summary_table_{}.csv".format(target), index=False, sep=";", decimal=",")
# generate table
for model, group in df.groupby(["model"]):
meta_str = "{:<6}".format(model)
mae_str = "MAE: {:.3f} std: {:.2f}".format(group.mean()["MAE"], group.std()["MAE"])
mbe_str = "MBE: {:.3f} std: {:.2f}".format(group.mean()["MBE"], group.std()["MBE"])
rmse_str = "RMSE: {:.3f} std: {:.2f}".format(group.mean()["RMSE"], group.std()["RMSE"])
skill_str = "skill: {:.1f} std: {:.2f}".format(group.mean()["skill"] * 100, group.std()["skill"] * 100)
print("{:<30} && {:<16} & {:<16} & {:<20} & {:<18} \\\\".format(meta_str, mae_str, mbe_str, rmse_str, skill_str))
# computes and prints error metrics
target = "BNI"
summarize(target)
summary_table(target)
target = "GHI"
summarize(target)
summary_table(target)