import xlrd import numpy as np from sklearn.model_selection import train_test_split #划分测试集和训练集 import matplotlib.pyplot as plt from sklearn.model_selection import cross_val_score #交叉验证 from sklearn import metrics from sklearn.preprocessing import OneHotEncoder from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay #混淆矩阵 # 特异度函数 def specificity_loss_func(ground_truth, predictions): tp, tn, fn, fp = 0.0,0.0,0.0,0.0 for l,m in enumerate(ground_truth): if m==predictions[l] and m==1: tp+=1 if m==predictions[l] and m==0: tn+=1 if m!=predictions[l] and m==1: fn+=1 if m!=predictions[l] and m==0: fp+=1 return tn/(tn+fp) # 读取表格 input_list = [] input_sheet = xlrd.open_workbook("./data.xls") input_sheet = input_sheet.sheet_by_index(0) nrows = input_sheet.nrows for i in range(nrows): input_list.append(np.array(input_sheet.row_values(i))) input_data = np.array(input_list) # 去除表头第一行 input_data = input_data[1:,:] # 去除第一列 input_data = input_data[:,1:] input_data = np.array(input_data, dtype=float) svc_list = [] tree_list = [] adaboost_list =[] train_x,test_x,train_y,test_y = train_test_split(input_data[:,:-1],input_data[:,-1],test_size=0.2,random_state=10)#30 # 1.决策树模型 # 1-1.评估指标 from sklearn import tree model = tree.DecisionTreeClassifier(criterion = 'entropy',) model.fit(train_x,train_y) result = model.predict(test_x) prob = model.predict_proba(test_x) acc = np.mean(result == test_y) scores = cross_val_score(model, input_data[:,:-1], input_data[:,-1],cv=10) precision = metrics.precision_score(np.argmax(prob, axis=-1), test_y, average='micro') recall = metrics.recall_score(np.argmax(prob, axis=-1), test_y, average="micro") f1 = metrics.f1_score(np.argmax(prob, axis=-1), test_y, average="micro") spe = specificity_loss_func(test_y, np.argmax(prob, axis=-1)) #tree_list.append(acc) print("决策树模型") print("准确率:",acc) print("精确率:",precision) print("召回率:",recall) print("特异度:",spe) print("F1:",f1) print("交叉验证准确率:",scores.mean()) # 1-2.混淆矩阵 confusion_mat = confusion_matrix(np.array(test_y), np.array(result)) disp = ConfusionMatrixDisplay(confusion_matrix=confusion_mat) disp.plot( include_values=True, cmap=plt.cm.Blues, ax=None, xticks_rotation="horizontal", values_format=".2f" ) plt.title("Tree confusion matrix") plt.show() # 1-3.ROC曲线 roc_list = [] for i in range(len(test_y)): roc_list.append(prob[i][int(test_y[i])]) roc = metrics.roc_auc_score(test_y,np.array(result)) print("AUC值:",roc) fpr,tpr,thresholds=metrics.roc_curve(test_y,np.array(result)) plt.plot(fpr,tpr, label="ROC curve (area={0})".format(round(roc,2))) plt.plot([0,1],[0,1],linestyle='dashed') plt.xlabel("False Positive Rate") plt.ylabel("True Positive Rate") plt.title("Tree-ROC") plt.legend(loc='lower right') plt.show() print("\n\n") # 2.逻辑回归模型 # 2-1.评估指标 from sklearn.linear_model import LogisticRegression LogisticRegressionModel = LogisticRegression() LogisticRegressionModel.fit(train_x,train_y) #训练模型 LogisticRegressionModel_result = LogisticRegressionModel.predict(test_x) #模型预测 prob = model.predict_proba(test_x) #proba返回的是对于预测为各个类别的概率 acc=np.mean(LogisticRegressionModel_result == test_y) #计算测试集的准确率 scores = cross_val_score(LogisticRegressionModel, input_data[:,:-1], input_data[:,-1],cv=10) #计算十折交叉验证的准确率 precision = metrics.precision_score(np.argmax(prob, axis=-1), test_y, average='macro') #计算精确率 recall = metrics.recall_score(np.argmax(prob, axis=-1), test_y, average="macro") #计算召回率 f1 = metrics.f1_score(np.argmax(prob, axis=-1), test_y, average="macro") #计算F1 spe = specificity_loss_func(test_y, np.argmax(prob, axis=-1)) #计算特异度 print("逻辑回归模型") print("准确率:",acc) print("精确率:",precision) print("召回率:",recall) print("特异度:",spe) print("F1:",f1) print("交叉验证准确率:",scores.mean()) # 2-2.混淆矩阵 confusion_mat = confusion_matrix(np.array(test_y), np.array(LogisticRegressionModel_result)) disp = ConfusionMatrixDisplay(confusion_matrix=confusion_mat) disp.plot( include_values=True, #每个单元格上显示具体数值 cmap=plt.cm.Blues, #热力图颜色设为蓝色 ax=None, #默认,绘制图的坐标轴,否则使用当前活动的坐标轴 xticks_rotation="horizontal",#默认,类似旋转日期刻度 values_format=".2f", #显示数值的格式,两位小数 ) plt.title("LR-ROC") #图标题 plt.show() # 2-3.ROC曲线 roc_list = [] for i in range(len(test_y)): #得到测试样本的个数,次数 roc_list.append(prob[i][int(test_y[i])]) #print(roc_list) # 15个测试集取0|1的概率 roc = metrics.roc_auc_score(test_y,np.array(roc_list)) #用真值,概率值计算AUC值 print("AUC值:",roc) fpr,tpr,thresholds=metrics.roc_curve(test_y,np.array(roc_list)) #真实值,概率值放入ROC曲线 # print('FPR:',fpr) # print('TPR:',tpr) # print('thresholds:',thresholds) plt.plot(fpr,tpr, label="ROC curve (area={0})".format(round(roc,2))) #画ROC曲线,保留两位小数 plt.plot([0,1],[0,1],linestyle='dashed') #dashed,线条为虚线 plt.xlabel("False Positive Rate") plt.ylabel("True Positive Rate") plt.title("LR-ROC") plt.legend(loc='lower right') #将图例显示在右下角 plt.show() print("\n\n") # 3.随机森林 # 3-1.评估指标 from sklearn.ensemble import RandomForestClassifier SLmodel = RandomForestClassifier() SLmodel.fit(train_x,train_y) SLmodel_result = SLmodel.predict(test_x) prob = SLmodel.predict_proba(test_x) acc = np.mean(SLmodel_result == test_y) scores = cross_val_score(SLmodel, input_data[:,:-1], input_data[:,-1],cv=10) precision = metrics.precision_score(np.argmax(prob, axis=-1), test_y, average='macro') recall = metrics.recall_score(np.argmax(prob, axis=-1), test_y, average="macro") f1 = metrics.f1_score(np.argmax(prob, axis=-1), test_y, average="macro") spe = specificity_loss_func(test_y, np.argmax(prob, axis=-1)) #tree_list.append(acc) print("随机森林模型") print("准确率:",acc) print("精确率:",precision) print("召回率:",recall) print("特异度:",spe) print("F1:",f1) print("交叉验证准确率:",scores.mean()) # 3-1.混淆矩阵 confusion_mat = confusion_matrix(np.array(test_y), np.array(SLmodel_result)) disp = ConfusionMatrixDisplay(confusion_matrix=confusion_mat) disp.plot( include_values=True, cmap=plt.cm.Blues, ax=None, xticks_rotation="horizontal", values_format=".2f" ) plt.title("RF confusion matrix") plt.show() # 3-2.ROC曲线 roc_list = [] for i in range(len(test_y)): roc_list.append(prob[i][int(test_y[i])]) roc = metrics.roc_auc_score(test_y,np.array(SLmodel_result)) print("AUC值:",roc) fpr,tpr,thresholds=metrics.roc_curve(test_y,np.array(SLmodel_result)) plt.plot(fpr,tpr, label="ROC curve (area={0})".format(round(roc,2))) plt.plot([0,1],[0,1],linestyle='dashed') plt.xlabel("False Positive Rate") plt.ylabel("True Positive Rate") plt.title("RF-ROC") plt.legend(loc='lower right') # plt.show() print("\n\n") # 4.SVM模型 # 4-1.评估指标 from sklearn.svm import SVC SVCModel = SVC(probability=True) SVCModel.fit(train_x,train_y) SVCModel_result = SVCModel.predict(test_x) prob = SVCModel.predict_proba(test_x) # print(prob) acc = np.mean(SVCModel_result == test_y) scores = cross_val_score(SVCModel, input_data[:,:-1], input_data[:,-1],cv=10) precision = metrics.precision_score(np.argmax(prob, axis=-1), test_y, average='macro') recall = metrics.recall_score(np.argmax(prob, axis=-1), test_y, average="macro") f1 = metrics.f1_score(np.argmax(prob, axis=-1), test_y, average="macro") spe = specificity_loss_func(test_y, np.argmax(prob, axis=-1)) #svc_list.append(acc) print("SVM模型") print("准确率:",acc) print("精确率:",precision) print("召回率:",recall) print("特异度:",spe) print("F1:",f1) print("交叉验证准确率:",scores.mean()) # 4-2.混淆矩阵 confusion_mat = confusion_matrix(np.array(test_y), np.array(SVCModel_result)) disp = ConfusionMatrixDisplay(confusion_matrix=confusion_mat) disp.plot( include_values=True, cmap=plt.cm.Blues, ax=None, xticks_rotation="horizontal", values_format=".2f", ) plt.title("SVM-confusion matrix") plt.show() # 4-3.ROC曲线 roc_list = [] for i in range(len(test_y)): roc_list.append(prob[i][int(test_y[i])]) roc = metrics.roc_auc_score(test_y,np.array(roc_list)) print("AUC值:",roc) fpr,tpr,thresholds=metrics.roc_curve(test_y,np.array(roc_list)) plt.plot(fpr,tpr, label="ROC curve (area={0})".format(round(roc,2))) plt.plot([0,1],[0,1],linestyle='dashed') plt.xlabel("False Positive Rate") plt.ylabel("True Positive Rate") plt.title("SVM-ROC") plt.legend(loc='lower right') plt.show() print("\n\n") # 5.AdaBoost模型 # 5-1.评估指标 from sklearn.ensemble import AdaBoostClassifier model = AdaBoostClassifier(n_estimators=100) model.fit(train_x,train_y) result = model.predict(test_x) prob = model.predict_proba(test_x) # print(prob) acc = np.mean(result == test_y) scores = cross_val_score(model, input_data[:,:-1], input_data[:,-1],cv=10) precision = metrics.precision_score(np.argmax(prob, axis=-1), test_y, average='macro') recall = metrics.recall_score(np.argmax(prob, axis=-1), test_y, average="macro") f1 = metrics.f1_score(np.argmax(prob, axis=-1), test_y, average="macro") spe = specificity_loss_func(test_y, np.argmax(prob, axis=-1)) #adaboost_list.append(acc) print("AdaBoost模型") print("准确率:",acc) print("精确率:",precision) print("召回率:",recall) print("特异度:",spe) print("F1:",f1) print("交叉验证准确率:",scores.mean()) print("特征重要性:",model.feature_importances_) # 5-2.混淆矩阵 confusion_mat = confusion_matrix(np.array(test_y), np.array(result)) disp = ConfusionMatrixDisplay(confusion_matrix=confusion_mat) disp.plot( include_values=True, cmap=plt.cm.Blues, ax=None, xticks_rotation="horizontal", values_format=".2f" ) plt.title("Adboost confusion matrix") plt.show() #3.画ROC曲线 roc_list = [] for i in range(len(test_y)): roc_list.append(prob[i][int(test_y[i])]) roc = metrics.roc_auc_score(test_y,np.array(roc_list)) print("AUC值:",roc) fpr,tpr,thresholds=metrics.roc_curve(test_y,np.array(roc_list)) plt.plot(fpr,tpr, label="ROC curve (area={0})".format(round(roc,2))) plt.plot([0,1],[0,1],linestyle='dashed') plt.xlabel("False Positive Rate") plt.ylabel("True Positive Rate") plt.title("Adboost-ROC") plt.legend(loc='lower right') plt.show() # print(test_x) print("十次交叉验证准确率:",scores)
标签:plt,roc,print,准确率,test,np,召回,prob,特异性 From: https://www.cnblogs.com/cupleo/p/16755731.html