-
Notifications
You must be signed in to change notification settings - Fork 12
/
kscurve.py
88 lines (85 loc) · 3.9 KB
/
kscurve.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# *_*coding:utf-8 *_*
import pandas as pd
import numpy as np
import xlrd
import matplotlib.pyplot as plt
#读取EXCEL数据
fname="XX.xlsx"
bk=xlrd.open_workbook(fname)
bk = xlrd.open_workbook(fname)
shxrange = range(bk.nsheets)
try:
sh = bk.sheet_by_name("Sheet2")
except:
print("no sheet in %s named Sheet1" % fname)
nrows = sh.nrows
ncols = sh.ncols
# print("nrows %d, ncols %d" % (nrows, ncols))
row_list = []
for i in range(1, nrows):
row_data = sh.row_values(i)
row_list.append(row_data)
data=pd.DataFrame(row_list)
data.columns = ['cus_num', 'name', 'id', 'cell', 'scorebankv2','scoreconsoffv2', 'scorecreditbt', 'scorelargecashv1', 'scorelargecashv2', 'scorepettycashv1',"y"]
# print(data.head())
#KS计算
def ks(actuals,predictionScore,name=None,interval_num=None):
if interval_num==None:
num=10
else:
num=interval_num
if len(actuals)!=len(predictionScore):
print("invalid length between actuals and predictionScore")
else:
predictionScore=predictionScore.convert_objects(convert_numeric=True)
if len(predictionScore[pd.isnull(predictionScore)]):
null_true = pd.isnull(predictionScore)
predictionScore = predictionScore[-null_true]
actuals = actuals[-null_true]
ksdata = pd.DataFrame()
ksdata["label"] = actuals
ksdata["prediction"] = predictionScore
ksdata.columns = ['actuals', 'predictionScore']
ksdata.sort_values("predictionScore", inplace=True)
ksdata.index = np.arange(len(ksdata))
index = []
for i in range(num+1):
index.append(int(i * len(ksdata) / num))
index[len(index) - 1] = index[len(index) - 1] - 1
FPR = []
TPR = []
KS = []
for i in index:
TP = len(ksdata[(ksdata["predictionScore"] < (ksdata["predictionScore"][i])) & (ksdata["actuals"] == 0)])
FP = len(ksdata[(ksdata["predictionScore"] < (ksdata["predictionScore"][i])) & (ksdata["actuals"] == 1)])
TN = len(ksdata[(ksdata["predictionScore"] >= (ksdata["predictionScore"][i])) & (ksdata["actuals"] == 1)])
FN = len(ksdata[(ksdata["predictionScore"] >= (ksdata["predictionScore"][i])) & (ksdata["actuals"] == 0)])
TPR.append(TP / (TP + FN))
FPR.append(FP / (FP + TN))
KS.append((FP / (FP + TN)) - (TP / (TP + FN)))
t = np.argmax(np.array(KS))
plt.figure(figsize=(10, 6))
plt.plot(np.arange(num+1), TPR, c='green', linewidth=2.3, linestyle="-", label="TPR")
plt.plot(np.arange(num+1), FPR, c='blue', linewidth=2.3, linestyle="-", label="FPR")
plt.plot(np.arange(num+1), KS, c="m", linewidth=2.5, linestyle="-", label="KS")
plt.xlabel('Rank', fontsize=16)
plt.ylabel('Percentage Responders Captured', fontsize=16)
if name==None:
plt.title('KS Plot', fontsize=16)
else:
plt.title('KS Plot of %s ' % name, fontsize=16)
# plt.text(.8, .6, 'ks=%f'%max(KS),fontsize=16)
plt.annotate('ks=%f' % max(KS), fontsize=16, xy=(t - 0.1, .45), xytext=(.7, .6),
arrowprops=dict(facecolor='k', shrink=0.05, connectionstyle="arc3,rad=.1"))
plt.grid(True)
plt.legend(loc='upper left')
plt.plot([t, t], [TPR[t], FPR[t]], color='red', linewidth=2.5, linestyle="--")
plt.show()
print('ks=%f' % max(KS))
return max(KS)
ks(actuals=data["y"],predictionScore=data["scorebankv2"],name="scorebankv2",interval_num=20)
ks(actuals=data["y"],predictionScore=data["scoreconsoffv2"],name="scoreconsoffv2")
ks(actuals=data["y"],predictionScore=data["scorecreditbt"],name="scorecreditbt",interval_num=30)
ks(actuals=data["y"],predictionScore=data["scorelargecashv1"],name="scorelargecashv1")
ks(actuals=data["y"],predictionScore=data["scorelargecashv2"],name="scorelargecashv2")
ks(actuals=data["y"],predictionScore=data["scorepettycashv1"],name="scorepettycashv1")