forked from sophieaylin/PV-forecasting
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Regression.py
128 lines (101 loc) · 4.86 KB
/
Regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import tables
from sklearn import linear_model, ensemble, neural_network
from sklearn.preprocessing import StandardScaler
from DataManagement import get_features, get_target
Capacity = 20808.66
features = get_features()
out = get_target()
features = features.dropna(axis=0, how="any")
train_Pdc = pd.concat([features.t, features.Pdc_5min, features.Pdc_10min, features.Pdc_15min, features.Pdc_20min,
features.Pdc_25min, features.Pdc_30min, features.ENI, features.El], axis=1)
Pdc_train = features.Pdc_33 # nur für max(Pdc)
features = features.drop(['Pdc_5min', 'Pdc_10min', 'Pdc_15min', 'Pdc_20min', 'Pdc_25min', 'Pdc_30min',
'ENI', 'El', 'Pdc_33'], axis=1)
out = out.dropna(axis=0, how="any")
tar = pd.concat([out.t, out.Pdc_5min, out.Pdc_10min, out.Pdc_15min, out.Pdc_20min,
out.Pdc_25min, out.Pdc_30min, out.ENI, out.El], axis=1)
Pdc_test = out.Pdc_33
test_features = out.drop(['Pdc_5min', 'Pdc_10min', 'Pdc_15min', 'Pdc_20min', 'Pdc_25min', 'Pdc_30min',
'ENI', 'El', 'Pdc_33'], axis=1)
#tar = tar.drop(tar.columns[tar.columns.str.startswith(("B_", "L_", "V_"))], axis=1)
def run_forecast(target,horizon):
train = features.drop('t', axis=1)
test = test_features.drop('t', axis=1)
train_y = train_Pdc.drop('t', axis=1)
test_y = tar.drop('t', axis=1)
# involve target(Pdc) in Set of trainingsdata
cols = ["Pdc_{}".format(horizon)]
train = train.join(train_y, how="inner")
test = test.join(test_y, how="inner")
feature_cols = features.filter(regex=target).columns.tolist()
train = train[cols + feature_cols]
test = test[cols + feature_cols]
# Include Pdc in Trainingsset
Pdc_35_train = Pdc_train.shift(periods=7)
Pdc_35_train = Pdc_35_train[14:]
Pdc_35_test = Pdc_test.shift(periods=7)
Pdc_35_test = Pdc_35_test[14:]
train = train[0:(len(train)-14)]
test = test[0:(len(test)-14)]
train.insert(len(train.columns), column="Pdc_35", value=Pdc_35_train.values)
test.insert(len(test.columns), column="Pdc_35", value=Pdc_35_test.values)
train_X = train[feature_cols + ["Pdc_35"]].values
test_X = test[feature_cols + ["Pdc_35"]].values
train_y = train_y[0:(len(train_y)-14)]
test_y = test_y[0:(len(test_y)-14)]
train_Y = train_y['Pdc_{}'.format(horizon)].values
test_Y = test_y['Pdc_{}'.format(horizon)].values
# Ordinary Least-Squares (OLS)
# Ridge Regression (OLS + L2-regularizer)
# Lasso (OLS, L1-regularizer)
models = [["ols", linear_model.LinearRegression()],
["ridge", linear_model.RidgeCV(cv=10)],
["lasso", linear_model.LassoCV(cv=10, max_iter=10000)]]
scaler = StandardScaler()
scaler.fit(train_X)
train_X = scaler.transform(train_X)
test_X = scaler.transform(test_X)
for name, model in models:
model.fit(train_X, train_Y)
train_pred = model.predict(train_X)
test_pred = model.predict(test_X)
train_pred = train_pred * train_y.ENI #* Capacity
test_pred = test_pred * test_y.ENI #* Capacity
train.insert(train.shape[1], "Pdc_{}_{}".format(target,name), train_pred)
test.insert(test.shape[1], "Pdc_{}_{}".format(target,name), test_pred)
train_Y = train_Y * train_y.ENI # * Capacity
test_Y = test_Y * test_y.ENI # * Capacity
# smart persistence forecast
# uses the shortest Power Output as the current Power value
tmp = np.squeeze(train_y["Pdc_5min"].values) * train_y.ENI #* Capacity
train.insert(train.shape[1], "Pdc_{}_sp".format(target), tmp)
tmp = np.squeeze(test_y["Pdc_5min"].values) * test_y.ENI #* Capacity
test.insert(test.shape[1], "Pdc_{}_sp".format(target), tmp)
# save actual values to compare with predicted values in Postprozess
train.insert(train.shape[1], "Pdc_{}_actual".format(target), train_Y)
test.insert(test.shape[1], "Pdc_{}_actual".format(target), test_Y)
# save forecasts
# only keep essential forecast columns
cols = train.columns[train.columns.str.startswith("Pdc_{}".format(target))]
train = train[cols]
test = test[cols]
train.insert(train.shape[1], "dataset", "Train")
test.insert(test.shape[1], "dataset", "Test")
df = pd.concat([train, test], axis=0)
df.insert(df.shape[1], "target", target)
df.insert(df.shape[1], "horizon", horizon)
df.to_hdf(os.path.join("forecasts",
"forecasts_{}_{}.h5".format
(horizon,target),
), "df", mode="w",
)
target = ["GHI", "BNI"]
horizon = ["5min", "10min", "15min", "20min", "25min", "30min"]
for t in target:
for h in horizon:
print("{} Pdc forecast for {}".format(h,t))
run_forecast(t,h)