-
Notifications
You must be signed in to change notification settings - Fork 0
/
Trading_strategies_classification(digitized).py
109 lines (85 loc) · 3.2 KB
/
Trading_strategies_classification(digitized).py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import numpy as np
import yfinance as yf
import pandas as pd
from pylab import mpl, plt
from sklearn import linear_model
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
plt.style.use('seaborn-v0_8-whitegrid')
mpl.rcParams['font.family'] = 'serif'
def load_raw_data(tickers, start_date, end_date):
raw = yf.download(tickers, start_date, end_date)['Adj Close']
raw.reindex(columns=tickers)
raw = pd.DataFrame(raw)
return raw
def create_bins(raw, symbol, lags):
data = pd.DataFrame(raw[symbol])
data['returns'] = np.log(data / data.shift(1))
data.dropna(inplace=True)
data['direction'] = np.sign(data['returns']).astype(int)
cols = []
for lag in range(1, lags + 1):
col = 'lag_{}'.format(lag)
data[col] = data['returns'].shift(lag)
cols.append(col)
data.dropna(inplace=True)
mu = data['returns'].mean()
v = data['returns'].std()
bins = [mu - v, mu, mu + v]
cols_bin = []
for col in cols:
col_bin = col + 'bin'
data[col_bin] = np.digitize(data[col], bins=bins)
cols_bin.append(col_bin)
return data, cols_bin, bins
def classification_models(data, cols_bin):
models = {
'log_reg': linear_model.LogisticRegression(C=1),
'guass_nb': GaussianNB(),
'svm': SVC(C=1)
}
mfit = {model: models[model].fit(data[cols_bin],
data['direction'])
for model in models.keys()}
for model in models.keys():
data['pos_' + model] = models[model].predict(data[cols_bin])
sel = []
for model in models.keys():
col = 'strat_' + model
data[col] = data['pos_' + model] * data['returns']
sel.append(col)
sel.insert(0, 'returns')
print(data[sel].sum().apply(np.exp))
data[sel].cumsum().apply(np.exp).plot(figsize=(10, 6))
plt.show()
return models, data
def next_day_predict(models, symbol, pred_start, pred_end, lags, bins):
pred_data = pd.DataFrame(yf.download(symbol, pred_start, pred_end)['Adj Close'])
pred_data['returns'] = np.log(pred_data / pred_data.shift(1))
pred_data.dropna(inplace=True)
pcols = []
for plag in range(1, lags + 1):
col = 'lag_{}'.format(plag)
pred_data[col] = pred_data['returns'].shift(plag - 1)
pcols.append(col)
pred_data.dropna(inplace=True)
cols_bin = []
for col in pcols:
col_bin = col + 'bin'
pred_data[col_bin] = np.digitize(pred_data[col], bins=bins)
cols_bin.append(col_bin)
for model in models.keys():
pred_data['pred_' + model] = models[model].predict(pred_data[cols_bin])
return pred_data
if __name__ == '__main__':
tickers = ['SPY', 'NVDA']
start_date = '2020-01-01'
end_date = '2023-06-06'
raw_ = load_raw_data(tickers, start_date, end_date)
symbol = 'SPY'
lags = 5
data_, cols_bin_, bins = create_bins(raw_, symbol, lags)
models_, data_ = classification_models(data_, cols_bin_)
pred_start = '2023-05-20'
pred_end = '2023-06-05'
pred_data = next_day_predict(models_, symbol, pred_start, pred_end, lags, bins)