cut
sx['kh_loanterm']=pd.cut(sx['kh_loanterm'],bins=[3,6,9,12,15,np.inf],right=False,
labels=['A_3','B_6','C_9','D_12','E_15'])
def 朴道_海纳综合指数V2_申请命中网络贷款类机构数_trans(x):
# 朴道_海纳综合指数V2_申请命中网络贷款类机构数 连续型特征的分箱转换函数
inf = np.inf
bins = [-inf, 0.5, 1.5, 2.5, 13.5, inf]
for i in range(len(bins)-1):
start = bins[i]
end = bins[i+1]
if start < x <= end:
return "{0}({1}, {2}]".format(i+1, start, end) # 分箱字符串格式: i(start, end]
return '0_nan'
fksx1[i]=fksx1[i].cat.add_categories(['空值'])
qcut
for i in coll:
sx[i]=pd.qcut(sx[sx[i]>=0][i],q=5,duplicates='drop')
sx[i]=sx[i].astype('str')
sx[i].replace('nan',np.nan,inplace=True)
sx[i].fillna(sx2[i],inplace=True)