-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpredict_mvp_voting.py
105 lines (75 loc) · 3.71 KB
/
predict_mvp_voting.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import get_stats
import pandas
from sklearn import preprocessing, ensemble, metrics, linear_model,tree, svm
import numpy
def run_regression(features = None):
get_stats.get_stats()
training_data = pandas.read_csv("stats_data.csv")
training_data.fillna(0, inplace=True)
if features is None:
features = training_data.copy().drop(["Share", "Unnamed: 0"], axis="columns").columns
training_data_copy = training_data[features]
label_encoder = preprocessing.LabelEncoder()
for i in range(len(features)):
training_data_copy.iloc[:, i] = label_encoder.fit_transform(training_data_copy.iloc[:, i])
X_train = training_data_copy
y_train = training_data["Share"]
decision_tree = tree.DecisionTreeRegressor()
decision_tree.fit(X_train, y_train)
test_set = get_stats.create_test_set(2017)
test_set_copy = test_set[features]
encoded_test_set = test_set_copy
for i in range(len(features)):
encoded_test_set.iloc[:, i] = label_encoder.fit_transform(encoded_test_set.iloc[:, i])
X_test = encoded_test_set
y_test = encoded_test_set["All-Star"]
y_predicted = decision_tree.predict(X_test)
print("\nDecision Tree Regressor Results: ")
test_results = pandas.DataFrame(test_set["Player"])
test_results["Share"] = y_predicted
print(test_results.sort_values("Share", ascending=False)[0:10])
print("Mean Absolute Error: {}".format(metrics.mean_absolute_error(y_test, y_predicted)))
print("Mean Squared Error: {}".format(metrics.mean_squared_error(y_test, y_predicted)))
print("R2 Score: {}".format(metrics.r2_score(y_test, y_predicted)))
forest = ensemble.RandomForestRegressor(random_state=3)
forest.fit(X_train, y_train)
test_set = get_stats.create_test_set(2017)
test_set_copy = test_set[features]
encoded_test_set = test_set_copy
for i in range(len(features)):
encoded_test_set.iloc[:, i] = label_encoder.fit_transform(encoded_test_set.iloc[:, i])
X_test = encoded_test_set
y_test = encoded_test_set["All-Star"]
y_predicted = forest.predict(X_test)
print("\nRandom Forest Regressor Results: ")
test_results = pandas.DataFrame(test_set["Player"])
test_results["Share"] = y_predicted
print(test_results.sort_values("Share", ascending=False)[0:10])
print("Mean Absolute Error: {}".format(metrics.mean_absolute_error(y_test, y_predicted)))
print("Mean Squared Error: {}".format(metrics.mean_squared_error(y_test, y_predicted)))
print("R2 Score: {}".format(metrics.r2_score(y_test, y_predicted)))
linear = linear_model.LinearRegression()
linear.fit(X_train, y_train)
test_set = get_stats.create_test_set(2017)
test_set_copy = test_set[features]
encoded_test_set = test_set_copy
for i in range(len(features)):
encoded_test_set.iloc[:, i] = label_encoder.fit_transform(encoded_test_set.iloc[:, i])
X_test = encoded_test_set
y_test = encoded_test_set["All-Star"]
y_predicted = linear.predict(X_test)
print("\nLinear Multivariate Regressor Results: ")
test_results = pandas.DataFrame(test_set["Player"])
test_results["Share"] = y_predicted
print(test_results.sort_values("Share", ascending=False)[0:10])
print("Mean Absolute Error: {}".format(metrics.mean_absolute_error(y_test, y_predicted)))
print("Mean Squared Error: {}".format(metrics.mean_squared_error(y_test, y_predicted)))
print("R2 Score: {}".format(metrics.r2_score(y_test, y_predicted)))
def main():
features = ["PTS", "TRB", "AST", "All-Star", "FGA", "WS", "FG%"]
print("\nRegression With All Features: ")
run_regression()
print("\nRegression with Subset of Features {}".format(features))
run_regression(features)
if __name__ == '__main__':
main()