-
Notifications
You must be signed in to change notification settings - Fork 49
/
22_stepwise_forward_selection.py
44 lines (37 loc) · 1.49 KB
/
22_stepwise_forward_selection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import statsmodels.regression.linear_model as sm
import pandas as pd
def Stepwise_Forward_Selection(Data, Inputs,Output):
Model_var1=sm.OLS
X=Data[Inputs]
y=Data[Output]
initial_list=[]
threshold_in=0.05
verbose=True
included = list(initial_list)
while True:
changed=False
excluded = list(set(X.columns)-set(included))
new_pval = pd.Series(index=excluded)
for new_column in excluded:
model = Model_var1(y, sm.add_constant(pd.DataFrame(X[included+[new_column]]))).fit()
new_pval[new_column] = model.pvalues[new_column]
best_pval = new_pval.min()
if best_pval < threshold_in:
best_feature = new_pval.argmin()
included.append(best_feature)
changed=True
if verbose:
print('Add {:30} with p-value {:.6}'.format(best_feature, best_pval))
if not changed:
break
return included
def build_csv(data, columnlist):
newdata = pd.DataFrame()
for i in columnlist:
newdata[i] = data[i]
newdata.to_csv("Stepwise_selected.csv", index=False)
def use_package():
df = pd.read_csv("./14_input_data.csv")
included = Stepwise_Forward_Selection(df,list(df.columns)[:-1],['SalePrice'])
build_csv(df, included)
use_package()