13、对回归模型采用十折交叉验证评估
#导入必要的包
import torch
from torch import nn
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader,TensorDataset
from sklearn.model_selection import train_test_split
from collections import OrderedDict
from torch.nn import init
import torch.utils.data as Data
#定义获取每折的训练集测试集数据的函数
def get_kfold_data(k, i, X, y):
fold_size = X.shape[0] // k
val_start = i * fold_size
if i != k - 1:
val_end = (i + 1) * fold_size
X_valid, y_valid = X[val_start:val_end], y[val_start:val_end]
X_train = torch.cat((X[0:val_start], X[val_end:]), dim = 0)
y_train = torch.cat((y[0:val_start], y[val_end:]), dim = 0)
else:
X_valid, y_valid = X[val_start:], y[val_start:]
X_train = X[0:val_start]
y_train = y[0:val_start]
return X_train, y_train, X_valid, y_valid
#定义多折交叉验证函数
def k_fold(k, X, y):
train_loss_sum, valid_loss_sum = 0, 0
train_acc_sum, valid_acc_sum = 0, 0
data = []
train_loss_to_data = []
valid_loss_to_data = []
train_acc_to_data = []
valid_acc_to_data = []
for i in range(k):
print('第', i + 1,'折验证结果')
X_train, y_train, X_valid, y_valid = get_kfold_data(k, i, X, y)
dataset = Data.TensorDataset(X_train, y_train)
train_iter = Data.DataLoader(
dataset=dataset, # torch TensorDataset format
batch_size=batch_size, # mini batch size
shuffle=True, # 是否打乱数据 (训练集一般需要进行打乱)
num_workers=0, # 多线程来读数据, 注意在Windows下需要设置为0
)
# 将测试数据的特征和标签组合
dataset = Data.TensorDataset(X_valid, y_valid)
# 把 dataset 放入 DataLoader
test_iter = Data.DataLoader(
dataset=dataset, # torch TensorDataset format
batch_size=batch_size, # mini batch size
shuffle=True, # 是否打乱数据
num_workers=0, # 多线程来读数据, 注意在Windows下需要设置为0
)
train_loss, val_loss, train_acc, val_acc = train(model,train_iter,test_iter,loss,num_epochs,batch_size,lr)
train_loss_to_data.append(train_loss)
valid_loss_to_data.append(val_loss)
train_acc_to_data.append(train_acc.detach().numpy())
valid_acc_to_data.append(val_acc.detach().numpy())
train_loss_sum += train_loss
valid_loss_sum += val_loss
train_acc_sum += train_acc
valid_acc_sum += val_acc
print('\n','最终k折交叉验证结果:')
print('average train loss:{:.4f}, average train accuracy:{:.3f}%%'.format(train_loss_sum/k, train_acc_sum/k*100))
print('average valid loss:{:.4f}, average valid accuracy:{:.3f}%%'.format(valid_loss_sum/k, valid_acc_sum/k*100))
data.append(train_loss_to_data)
data.append(valid_loss_to_data)
data.append(train_acc_to_data)
data.append(valid_acc_to_data)
return data
#定义训练函数
def train(model,train_iter,test_iter,loss,num_epochs,batch_size,lr):
train_ls,test_ls = [],[]
train_ac, test_ac = [],[]
for epoch in range(num_epochs):
train_ls_sum ,test_ls_sum = 0,0
train_ac_sum ,test_ac_sum = 0,0
n_train, n_test = 0,0
for x,y in train_iter:
y_pred = model(x)
l = loss(y_pred,y)
optimizer.zero_grad()
l.backward()
optimizer.step()
train_ls_sum += l.item()
ac = (1-abs(y_pred - y)/y).mean()
train_ac_sum += ac
n_train+=1
for x ,y in test_iter:
y_pred = model(x)
l = loss(y_pred,y)
test_ls_sum +=l.item()
ac = (1-abs(y_pred - y)/y).mean()
test_ac_sum += ac
n_test+=1
train_ls.append(train_ls_sum)
test_ls.append(test_ls_sum)
train_ac.append(train_ac_sum/n_train)
test_ac.append(test_ac_sum/n_test)
print('epoch %d,train_loss %.6f, train_acc %.6f %%, test_loss %f, test_acc %f %%'
%(epoch+1, train_ls[epoch], train_ac[epoch]*100, test_ls[epoch], train_ac[epoch]*100))
return train_ls[epoch], test_ls[epoch], train_ac[epoch], test_ac[epoch]
#定义数据集
num_input ,num_example = 500,10000
true_w = torch.ones(1,num_input)*0.0056
true_b = 0.028
x_data = torch.tensor(np.random.normal(0,0.001,size = (num_example,num_input)),dtype = torch.float32)
y = torch.mm(x_data,true_w.t()) +true_b
y += torch.normal(0,0.001,y.shape)
#train_x,test_x,train_y,test_y = train_test_split(x_data,y,shuffle= True,test_size=0.3)
#定义模型
model= nn.Sequential(OrderedDict([
('linear1',nn.Linear(num_input,256)),
('linear2',nn.Linear(256,128)),
('linear3',nn.Linear(128,1)),
])
)
#初始化参数
for param in model.parameters():
init.normal_(param,mean = 0 ,std = 0.001)
k = 10 #折数
lr = 0.001 #学习率
batch_size = 50 #批量大小
num_epochs = 10 #训练次数
loss = nn.MSELoss() #损失函数
optimizer = torch.optim.SGD(model.parameters(),lr) #优化器
#开始训练和验证
data = k_fold(k, x_data, y)
#导入表格需要的包
import pandas as pd
import numpy as np
import os
#定义数据框架
name = []
for i in range(k):
name.append("第"+str(i+1)+"折")
dataframe = {"name": name,
"train_loss": data[0],
"valid_loss": data[1],
"train_acc": data[2],
"loss_acc": data[3],}
frame = pd.DataFrame(dataframe)
frame.to_csv("./前馈神经网络十折交叉验证模型_回归.csv", index=False)
#显示表格
pd.read_csv("./前馈神经网络十折交叉验证模型_回归.csv")
标签:loss,交叉,sum,验证,train,test,十折,data,valid
From: https://www.cnblogs.com/cyberbase/p/16821152.html