14、对二分类模型采用十折交叉验证评估
#导入必要的包
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset,DataLoader
from torch.nn import init
import torch.optim as optim
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt
#创建数据集
num_inputs,num_example = 200,10000
x1 = torch.normal(2,2,(num_example,num_inputs))
y1 = torch.ones((num_example,1))
x2 = torch.normal(-2,2,(num_example,num_inputs))
y2 = torch.zeros((num_example,1))
x_data = torch.cat((x1,x2),dim=0)
y_data = torch.cat((y1,y2),dim = 0)
#train_x,test_x,train_y,test_y = train_test_split(x_data,y_data,shuffle=True,test_size=0.3,stratify=y_data)
#定义数据迭代器
batch_size = 256
train_dataset = TensorDataset(train_x,train_y)
train_iter = DataLoader(
dataset = train_dataset,
shuffle = True,
num_workers = 0,
batch_size = batch_size
)
test_dataset = TensorDataset(test_x,test_y)
test_iter = DataLoader(
dataset = test_dataset,
shuffle = True,
num_workers = 0,
batch_size = batch_size
)
#定义模型
num_input,num_hidden,num_output = 200,256,1
class net(nn.Module):
def __init__(self,num_input,num_hidden,num_output):
super(net,self).__init__()
self.linear1 = nn.Linear(num_input,num_hidden,bias =False)
self.linear2 = nn.Linear(num_hidden,num_output,bias=False)
def forward(self,input):
out = self.linear1(input)
out = self.linear2(out)
return out
model = net(num_input,num_hidden,num_output)
print(model)
net(
(linear1): Linear(in_features=200, out_features=256, bias=False)
(linear2): Linear(in_features=256, out_features=1, bias=False)
)
#初始化参数
for param in model.parameters():
init.normal_(param,mean=0,std=0.001)
#定义训练函数
lr = 0.001
loss = nn.BCEWithLogitsLoss()
optimizer = optim.SGD(model.parameters(),lr)
def train(net,train_iter,test_iter,loss,num_epochs,batch_size):
train_ls,test_ls,train_acc,test_acc = [],[],[],[]
for epoch in range(num_epochs):
train_ls_sum,train_acc_sum,n = 0,0,0
for x,y in train_iter:
y_pred = model(x)
l = loss(y_pred,y)
optimizer.zero_grad()
l.backward()
optimizer.step()
train_ls_sum +=l.item()
train_acc_sum += (((y_pred>0.5)==y)+0.0).sum().item()
n += y_pred.shape[0]
train_ls.append(train_ls_sum)
train_acc.append(train_acc_sum/n)
test_ls_sum,test_acc_sum,n = 0,0,0
for x,y in test_iter:
y_pred = model(x)
l = loss(y_pred,y)
test_ls_sum +=l.item()
test_acc_sum += (((y_pred>0.5)==y)+0.0).sum().item()
n += y_pred.shape[0]
test_ls.append(test_ls_sum)
test_acc.append(test_acc_sum/n)
print('epoch %d, train_loss %.6f,test_loss %f, train_acc %.6f,test_acc %f'
%(epoch+1, train_ls[epoch],test_ls[epoch], train_acc[epoch],test_acc[epoch]))
return train_ls[epoch],test_ls[epoch],train_acc[epoch],test_acc[epoch]
#定义获取每折的训练集测试集数据的函数
def get_kfold_data(k, i, X, y):
fold_size = X.shape[0] // k
val_start = i * fold_size
if i != k - 1:
val_end = (i + 1) * fold_size
X_valid, y_valid = X[val_start:val_end], y[val_start:val_end]
X_train = torch.cat((X[0:val_start], X[val_end:]), dim = 0)
y_train = torch.cat((y[0:val_start], y[val_end:]), dim = 0)
else:
X_valid, y_valid = X[val_start:], y[val_start:]
X_train = X[0:val_start]
y_train = y[0:val_start]
return X_train, y_train, X_valid, y_valid
#定义多折交叉验证函数
def k_fold(k, X, y):
train_loss_sum, valid_loss_sum = 0, 0
train_acc_sum, valid_acc_sum = 0, 0
data = []
train_loss_to_data = []
valid_loss_to_data = []
train_acc_to_data = []
valid_acc_to_data = []
for i in range(k):
print('第', i + 1,'折验证结果')
X_train, y_train, X_valid, y_valid = get_kfold_data(k, i, X, y)
dataset = Data.TensorDataset(X_train, y_train)
train_iter = Data.DataLoader(
dataset=dataset, # torch TensorDataset format
batch_size=batch_size, # mini batch size
shuffle=True, # 是否打乱数据 (训练集一般需要进行打乱)
num_workers=0, # 多线程来读数据, 注意在Windows下需要设置为0
)
# 将测试数据的特征和标签组合
dataset = Data.TensorDataset(X_valid, y_valid)
# 把 dataset 放入 DataLoader
test_iter = Data.DataLoader(
dataset=dataset, # torch TensorDataset format
batch_size=batch_size, # mini batch size
shuffle=True, # 是否打乱数据
num_workers=0, # 多线程来读数据, 注意在Windows下需要设置为0
)
train_loss, val_loss, train_acc, val_acc = train(model,train_iter,test_iter,loss,num_epochs,batch_size)
train_loss_to_data.append(train_loss)
valid_loss_to_data.append(val_loss)
train_acc_to_data.append(train_acc)
valid_acc_to_data.append(val_acc)
train_loss_sum += train_loss
valid_loss_sum += val_loss
train_acc_sum += train_acc
valid_acc_sum += val_acc
print('\n','最终k折交叉验证结果:')
print('average train loss:{:.4f}, average train accuracy:{:.3f}%%'.format(train_loss_sum/k, train_acc_sum/k*100))
print('average valid loss:{:.4f}, average valid accuracy:{:.3f}%%'.format(valid_loss_sum/k, valid_acc_sum/k*100))
data.append(train_loss_to_data)
data.append(valid_loss_to_data)
data.append(train_acc_to_data)
data.append(valid_acc_to_data)
return data
#训练次数和学习率
num_epochs = 10
k = 10
#开始十折交叉验证
data = k_fold(k, x_data, y_data)
#导入绘制表格需要的包
import pandas as pd
import numpy as np
import os
#定义数据框架
name = []
for i in range(k):
name.append("第"+str(i+1)+"折")
dataframe = {"name": name,
"train_loss": data[0],
"valid_loss": data[1],
"train_acc": data[2],
"loss_acc": data[3],}
frame = pd.DataFrame(dataframe)
frame.to_csv("./前馈神经网络十折交叉验证模型_二分类.csv", index=False)
#显示表格
pd.read_csv("./前馈神经网络十折交叉验证模型_二分类.csv")
标签:acc,loss,num,交叉,验证,train,test,十折,data
From: https://www.cnblogs.com/cyberbase/p/16821153.html