导入资源包
from sklearn.linear_model import LogisticRegression
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
加载数据
# x_train,训练数据集数据
# x_test,测试数据集数据
# y_train,训练数据集的标签
# y_test ,测试数据集的标签
df = pd.read_excel('股票客户流失.xlsx',engine='openpyxl')
print(df.head())
x = df.drop(columns='是否流失')
y = df['是否流失']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=1)
建立模型
lr = LogisticRegression()
训练模型
lr.fit(x_train, y_train)
预测模型
y_pred = lr.predict(x_test)
打印对比预测结果和实际样本值
df1 = pd.DataFrame()
df1['实际值'] = list(y_test)
df1['预测值'] = list(y_pred)
print(df1.head(10))
精确度评估
score = accuracy_score(y_test, y_pred)
print('精确度:', str(score))
score1 = lr.score(x_test, y_test)
print('精确度:', str(score1))
打印预测结果概率
y_pred_proba = lr.predict_proba(x_test)
print(y_pred_proba[:5])
df2 = pd.DataFrame(y_pred_proba, columns=['不流失概率', '流失概率'])
df2 = df2.sort_values('流失概率', ascending=False)
print(df2)
【9】打印模型评估参数
from sklearn.metrics import classification_report
res = classification_report(y_test, y_pred)
print(res)
# 【10】绘制ROC曲线
from sklearn.metrics import roc_curve
fpr, tpr, thres = roc_curve(y_test, y_pred_proba[:, 1])
df3 = pd.DataFrame()
df3['假报警率'] = fpr
df3['命中率'] = tpr
df3['阈值'] = thres
df3['ks'] = df3['命中率'] - df3['假报警率']
print(df3.head())
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['simhei']
plt.plot(thres[1:], tpr[1:])
plt.plot(thres[1:], fpr[1:])
plt.plot(thres[1:], tpr[1:] - fpr[1:])
plt.xlabel("阈值")
plt.legend(["命中率", '假报警率', 'ks曲线'])
plt.gca().invert_xaxis()
plt.show()
m1 = max(df3['ks'])
print(df3[df3['ks'] == m1])
标签:plt,pred,模型,df3,KS,print,train,test,评估
From: https://www.cnblogs.com/JK8395/p/16803494.html