python实现KS曲线,相关使用方法请参考上篇博客-R语言实现KS曲线
代码如下:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
|
####################### PlotKS ########################## def PlotKS(preds, labels, n, asc): # preds is score: asc=1 # preds is prob: asc=0 pred = preds # 预测值 bad = labels # 取1为bad, 0为good ksds = DataFrame({ 'bad' : bad, 'pred' : pred}) ksds[ 'good' ] = 1 - ksds.bad if asc = = 1 : ksds1 = ksds.sort_values(by = [ 'pred' , 'bad' ], ascending = [ True , True ]) elif asc = = 0 : ksds1 = ksds.sort_values(by = [ 'pred' , 'bad' ], ascending = [ False , True ]) ksds1.index = range ( len (ksds1.pred)) ksds1[ 'cumsum_good1' ] = 1.0 * ksds1.good.cumsum() / sum (ksds1.good) ksds1[ 'cumsum_bad1' ] = 1.0 * ksds1.bad.cumsum() / sum (ksds1.bad) if asc = = 1 : ksds2 = ksds.sort_values(by = [ 'pred' , 'bad' ], ascending = [ True , False ]) elif asc = = 0 : ksds2 = ksds.sort_values(by = [ 'pred' , 'bad' ], ascending = [ False , False ]) ksds2.index = range ( len (ksds2.pred)) ksds2[ 'cumsum_good2' ] = 1.0 * ksds2.good.cumsum() / sum (ksds2.good) ksds2[ 'cumsum_bad2' ] = 1.0 * ksds2.bad.cumsum() / sum (ksds2.bad) # ksds1 ksds2 -> average ksds = ksds1[[ 'cumsum_good1' , 'cumsum_bad1' ]] ksds[ 'cumsum_good2' ] = ksds2[ 'cumsum_good2' ] ksds[ 'cumsum_bad2' ] = ksds2[ 'cumsum_bad2' ] ksds[ 'cumsum_good' ] = (ksds[ 'cumsum_good1' ] + ksds[ 'cumsum_good2' ]) / 2 ksds[ 'cumsum_bad' ] = (ksds[ 'cumsum_bad1' ] + ksds[ 'cumsum_bad2' ]) / 2 # ks ksds[ 'ks' ] = ksds[ 'cumsum_bad' ] - ksds[ 'cumsum_good' ] ksds[ 'tile0' ] = range ( 1 , len (ksds.ks) + 1 ) ksds[ 'tile' ] = 1.0 * ksds[ 'tile0' ] / len (ksds[ 'tile0' ]) qe = list (np.arange( 0 , 1 , 1.0 / n)) qe.append( 1 ) qe = qe[ 1 :] ks_index = Series(ksds.index) ks_index = ks_index.quantile(q = qe) ks_index = np.ceil(ks_index).astype( int ) ks_index = list (ks_index) ksds = ksds.loc[ks_index] ksds = ksds[[ 'tile' , 'cumsum_good' , 'cumsum_bad' , 'ks' ]] ksds0 = np.array([[ 0 , 0 , 0 , 0 ]]) ksds = np.concatenate([ksds0, ksds], axis = 0 ) ksds = DataFrame(ksds, columns = [ 'tile' , 'cumsum_good' , 'cumsum_bad' , 'ks' ]) ks_value = ksds.ks. max () ks_pop = ksds.tile[ksds.ks.idxmax()] print ( 'ks_value is ' + str (np. round (ks_value, 4 )) + ' at pop = ' + str (np. round (ks_pop, 4 ))) # chart plt.plot(ksds.tile, ksds.cumsum_good, label = 'cum_good' , color = 'blue' , linestyle = '-' , linewidth = 2 ) plt.plot(ksds.tile, ksds.cumsum_bad, label = 'cum_bad' , color = 'red' , linestyle = '-' , linewidth = 2 ) plt.plot(ksds.tile, ksds.ks, label = 'ks' , color = 'green' , linestyle = '-' , linewidth = 2 ) plt.axvline(ks_pop, color = 'gray' , linestyle = '--' ) plt.axhline(ks_value, color = 'green' , linestyle = '--' ) plt.axhline(ksds.loc[ksds.ks.idxmax(), 'cumsum_good' ], color = 'blue' , linestyle = '--' ) plt.axhline(ksds.loc[ksds.ks.idxmax(), 'cumsum_bad' ], color = 'red' , linestyle = '--' ) plt.title( 'KS=%s ' % np. round (ks_value, 4 ) + 'at Pop=%s' % np. round (ks_pop, 4 ), fontsize = 15 ) return ksds ####################### over ########################## |
作图效果如下:
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持服务器之家。
原文链接:https://www.cnblogs.com/bigdatafengkong/p/9079093.html