自己逻辑回归尝试
1、固定好坏样本,随机种子
先去看分箱情况
data_sd = X1
num_cols=X1.columns
import pycard as pc
num_iv_woedf = pd.DataFrame()
clf = pc.NumBin(max_bins_num=7,min_bin_samples=400)
for i in num_cols:
if data_sd[i].isnull().sum()/len(data_sd)<1:
clf.fit(data_sd[i] ,data_sd.target)
num_iv_woedf = num_iv_woedf.append(clf.woe_df_)
import statsmodels.api as sm#逻辑回归另外一个包
x_train,x_test,y_train,y_test=train_test_split(final_data.iloc[:,:-1],final_data['target'],test_size=0.3,stratify=final_data['target'])
X1=sm.add_constant(x_train)
X2=sm.add_constant(x_test)
logit=sm.Logit(y_train.astype(float),X1.astype(float))
result=logit.fit()
result.summary()
result.params#系数全正或全负
import math
import math
p = 50 / math.log(2) #factor = 20 / np.log(2)
q = 600 + 50 * math.log(0.1) / math.log(2)
p1=result.predict(X1)
p2=result.predict(X2)
p1=pd.DataFrame(p1)
p1.columns=['分数']
p1['分数']=p1['分数'].apply(lambda x:q-p*math.log(x/(1-x)))
xlj=toad.metrics.KS_bucket(p1['分数'],Y1,bucket=8, method = 'quantile')
xlj[['min','max','bads','goods','total','bad_rate','good_rate','ks','lift']]
p2=pd.DataFrame(p2)
p2.columns=['分数']
p2['分数']=p2['分数'].apply(lambda x:q-p*math.log(x/(1-x)))
csj=toad.metrics.KS_bucket(p2['分数'],Y2,
bucket=8, method = 'quantile')
csj[['min','max','bads','goods','total','bad_rate','good_rate','ks','lift']]
toad.metrics.PSI(p1['分数'],p2['分数'])
第二种办法
import math
coe=[-2.0111,
0.1151,
0.3572,
0.3207,
0.1028,
1.2132,
0.5397,
0.1861,
0.4674,
0.5649,
0.7729,
0.6253,
0.8328]
我们取600分为基础分值,PDO为20(每高20分好坏比翻一倍),好坏比取20。
p = 20 / math.log(2) #factor = 20 / np.log(2)
q = 600 + 20 * math.log(0.1) / math.log(2) #
baseScore = round(q - p * coe[0], 0) #b=offset+factor*log(o)
print(p,q,baseScore)
model_col=[ '华道_申请机构数',
'华道_互金类金融机构平均授信额度',
'中智诚_D90_手机号码__总申请机构数',
'中智诚_D540_身份证号码_银行类_申请数',
'中智诚_D720_身份证号码_银行类_申请机构数',
'中智诚_D720_手机号码_总申请机构数',
'度小满_早逾_小贷近360天查询机构数',
'度小满_早逾_小贷近1080天查询机构数',
'新颜_申请雷达_查询网络贷款类机构数',
'UPPA010',
'UPPB014',
'UPPC224']
前面已经有了
def get_score(coe,woe,factor):
scores=[]
for w in woe:
score=round(coewfactor,0)
scores.append(score)
return scores
各项部分分数
model_col
for i in range(1,13):
X1[model_col[i-1]+'_scores']=get_score(coe[i], X1[model_col[i-1]], p)
scores_col = [ i for i in X1.columns if i[-7:]=='_scores']
X1['fenshu'] = X1[scores_col].sum(axis=1)
X1['fenshu'] = baseScore-X1['fenshu']
model_col=[ '华道_申请机构数',
'华道_互金类金融机构平均授信额度',
'中智诚_D90_手机号码__总申请机构数',
'中智诚_D540_身份证号码_银行类_申请数',
'中智诚_D720_身份证号码_银行类_申请机构数',
'中智诚_D720_手机号码_总申请机构数',
'度小满_早逾_小贷近360天查询机构数',
'度小满_早逾_小贷近1080天查询机构数',
'新颜_申请雷达_查询网络贷款类机构数',
'UPPA010',
'UPPB014',
'UPPC224']
前面已经有了
def get_score(coe,woe,factor):
scores=[]
for w in woe:
score=round(coewfactor,0)
scores.append(score)
return scores
各项部分分数
model_col
for i in range(1,13):
X2[model_col[i-1]+'_scores']=get_score(coe[i], X2[model_col[i-1]], p)
scores_col = [ i for i in X2.columns if i[-7:]=='_scores']
X2['fenshu'] = X2[scores_col].sum(axis=1)
X2['fenshu'] = baseScore-X2['fenshu']
gg=pd.DataFrame(X2['fenshu'])
gg=gg.reset_index()
gg=gg.merge(pd.DataFrame(Y2).reset_index(),on = 'index',how='left')
import toad
toad.metrics.KS_bucket(gg['fenshu'],gg['target'], bucket=10, method = 'quantile')
评分卡
dpd1 = pd.DataFrame()
for i in col:
dd=ZT.drop_duplicates(subset=[i])[[i,i+'_scores']]
dd.columns=['区间','分数']
dd['变量']=i
dpd1 = dpd1.append(dd)