import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression,Ridge
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import roc_auc_score
from sklearn.ensemble import RandomForestClassifier,BaggingClassifier,AdaBoostClassifier,GradientBoostingClassifier
from sklearn.metrics import roc_curve
from sklearn.model_selection import GridSearchCV
path='data/data_loan.csv'
data=pd.read_csv(path)
data_x=data.drop(columns=['user_id','y'])
data_y=data['y']
x,test_x,y,test_y=train_test_split(data_x,data_y,train_size=0.7)
models=[LogisticRegression,Ridge,SVC,DecisionTreeClassifier,KNeighborsClassifier,roc_auc_score
,RandomForestClassifier,BaggingClassifier,AdaBoostClassifier,GradientBoostingClassifier]
for cls in models:
cls=LogisticRegression()
cls.fit(x,y)
pre_y=cls.predict_proba(test_x)[:,1]
grid_param=[{'n_estimators':[50,100,200,300],'max_depth':[3,5,7]}]
cls=RandomForestClassifier()
searcher=GridSearchCV(cls,grid_param,cv=5,scoring='roc_auc',n_jobs=4)
searcher.fit(x,y)
best_cls=searcher.best_estimator_
pre_y=best_cls.predict_proba(test_x)[:,1]
confusion_matrix = pd.crosstab(df['真实类别'], df['预测类别'], rownames=['真实类别'], colnames=['预测类别'])
打印混淆矩阵
print(confusion_matrix)
可视化混淆矩阵
sns.heatmap(confusion_matrix, annot=True, fmt='d')
plt.title('混淆矩阵')
plt.xlabel('预测类别')
plt.ylabel('真实类别')
plt.show()
precision, recall, thresholds = precision_recall_curve(y_true, y_scores)
绘制PR曲线
plt.plot(recall, precision, label='PR Curve')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('PR Curve')
plt.show()
fpr,tpr,_=roc_curve(y_test,y_score)
auc=auc(fpr,tpr)
plt.plot(fpr,tpr,label='ROC curve (area=%.2f)'%auc)
plt.plot([0,1],[0,1],linestyle='--')