首页 > 其他分享 >对回归模型采用十折交叉验证评估

对回归模型采用十折交叉验证评估

时间:2022-10-24 13:22:20浏览次数:68  
标签:loss 交叉 sum 验证 train test 十折 data valid

13、对回归模型采用十折交叉验证评估

#导入必要的包
import torch 
from torch import nn
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader,TensorDataset
from sklearn.model_selection import train_test_split
from collections import OrderedDict
from torch.nn import init
import torch.utils.data as Data  
#定义获取每折的训练集测试集数据的函数
def get_kfold_data(k, i, X, y):
    fold_size = X.shape[0] // k
    
    val_start = i * fold_size
    if i != k - 1:
        val_end = (i + 1) * fold_size
        X_valid, y_valid = X[val_start:val_end], y[val_start:val_end]
        X_train = torch.cat((X[0:val_start], X[val_end:]), dim = 0)
        y_train = torch.cat((y[0:val_start], y[val_end:]), dim = 0)
    else:
        X_valid, y_valid = X[val_start:], y[val_start:]
        X_train = X[0:val_start]
        y_train = y[0:val_start]
        
    return X_train, y_train, X_valid, y_valid
#定义多折交叉验证函数
def k_fold(k, X, y):
    
    train_loss_sum, valid_loss_sum = 0, 0
    train_acc_sum, valid_acc_sum = 0, 0
    
    data = []
    train_loss_to_data = []
    valid_loss_to_data = []
    train_acc_to_data = []
    valid_acc_to_data = []
    
    
    for i in range(k):
        print('第', i + 1,'折验证结果')
        X_train, y_train, X_valid, y_valid = get_kfold_data(k, i, X, y)
        dataset = Data.TensorDataset(X_train, y_train)  
        train_iter = Data.DataLoader(  
            dataset=dataset, # torch TensorDataset format  
            batch_size=batch_size, # mini batch size  
            shuffle=True, # 是否打乱数据 (训练集一般需要进行打乱)  
            num_workers=0, # 多线程来读数据, 注意在Windows下需要设置为0  
        )  
        # 将测试数据的特征和标签组合  
        dataset = Data.TensorDataset(X_valid, y_valid)  
        # 把 dataset 放入 DataLoader  
        test_iter = Data.DataLoader(  
            dataset=dataset, # torch TensorDataset format  
            batch_size=batch_size, # mini batch size  
            shuffle=True, # 是否打乱数据 
            num_workers=0, # 多线程来读数据, 注意在Windows下需要设置为0  
        )
        train_loss, val_loss, train_acc, val_acc = train(model,train_iter,test_iter,loss,num_epochs,batch_size,lr)
        
        train_loss_to_data.append(train_loss)
        valid_loss_to_data.append(val_loss)
        train_acc_to_data.append(train_acc.detach().numpy())
        valid_acc_to_data.append(val_acc.detach().numpy())
        
        train_loss_sum += train_loss
        valid_loss_sum += val_loss
        train_acc_sum += train_acc
        valid_acc_sum += val_acc
    
    print('\n','最终k折交叉验证结果:')
    
    print('average train loss:{:.4f}, average train accuracy:{:.3f}%%'.format(train_loss_sum/k, train_acc_sum/k*100))
    print('average valid loss:{:.4f}, average valid accuracy:{:.3f}%%'.format(valid_loss_sum/k, valid_acc_sum/k*100))
    
    data.append(train_loss_to_data)
    data.append(valid_loss_to_data)
    data.append(train_acc_to_data)
    data.append(valid_acc_to_data)
    
    return data
#定义训练函数
def train(model,train_iter,test_iter,loss,num_epochs,batch_size,lr):
    train_ls,test_ls = [],[]
    train_ac, test_ac = [],[]
    for epoch in range(num_epochs):
        train_ls_sum ,test_ls_sum = 0,0
        train_ac_sum ,test_ac_sum = 0,0
        n_train, n_test = 0,0
        for x,y in train_iter:
            y_pred = model(x)
            l = loss(y_pred,y)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            train_ls_sum += l.item()
            ac = (1-abs(y_pred - y)/y).mean()
            train_ac_sum += ac
            n_train+=1
        for x ,y in test_iter:
            y_pred = model(x)
            l = loss(y_pred,y)
            test_ls_sum +=l.item()
            ac = (1-abs(y_pred - y)/y).mean()
            test_ac_sum += ac
            n_test+=1
        train_ls.append(train_ls_sum)
        test_ls.append(test_ls_sum)
        train_ac.append(train_ac_sum/n_train)
        test_ac.append(test_ac_sum/n_test)
        print('epoch %d,train_loss %.6f, train_acc %.6f %%, test_loss %f, test_acc %f %%'
              %(epoch+1, train_ls[epoch], train_ac[epoch]*100, test_ls[epoch], train_ac[epoch]*100))
    return train_ls[epoch], test_ls[epoch], train_ac[epoch], test_ac[epoch]
#定义数据集
num_input ,num_example = 500,10000
true_w = torch.ones(1,num_input)*0.0056
true_b = 0.028
x_data = torch.tensor(np.random.normal(0,0.001,size  = (num_example,num_input)),dtype = torch.float32)
y = torch.mm(x_data,true_w.t()) +true_b
y += torch.normal(0,0.001,y.shape)
#train_x,test_x,train_y,test_y = train_test_split(x_data,y,shuffle= True,test_size=0.3)
#定义模型
model= nn.Sequential(OrderedDict([
    ('linear1',nn.Linear(num_input,256)),
    ('linear2',nn.Linear(256,128)),
    ('linear3',nn.Linear(128,1)),
])
)
#初始化参数
for param in model.parameters():
    init.normal_(param,mean = 0 ,std = 0.001)
k = 10 #折数
lr = 0.001 #学习率
batch_size = 50 #批量大小
num_epochs = 10 #训练次数
loss = nn.MSELoss() #损失函数
optimizer = torch.optim.SGD(model.parameters(),lr) #优化器
#开始训练和验证
data = k_fold(k, x_data, y)
#导入表格需要的包
import pandas as pd
import numpy as np
import os
#定义数据框架
name = []
for i in range(k):
    name.append("第"+str(i+1)+"折")
dataframe = {"name": name,
        "train_loss": data[0],
        "valid_loss": data[1],
        "train_acc": data[2],
        "loss_acc": data[3],}
frame = pd.DataFrame(dataframe)
frame.to_csv("./前馈神经网络十折交叉验证模型_回归.csv", index=False)

#显示表格
pd.read_csv("./前馈神经网络十折交叉验证模型_回归.csv")

标签:loss,交叉,sum,验证,train,test,十折,data,valid
From: https://www.cnblogs.com/cyberbase/p/16821152.html

相关文章