import pandas as pd
import torch
import torchvision
import torch.nn as nn
import numpy as np
import torch.utils.data as Data
from sklearn import preprocessing
import matplotlib.pyplot as plt
epochs = 20
batch_size = 64
lr = 0.001
# 我直接将官网的格式改成了csv文件
train_data = pd.read_csv('./data/train_10_percent.csv', header=None)
test_data = pd.read_csv('./data/test.csv', header=None)
# 分类任务,将测试集中多余的17种类别去掉
test_data = test_data[test_data[41].isin(set(train_data[41]))]
data = pd.concat((train_data, test_data), ignore_index=True)
# 特征和标签编码,删去了19列
le = preprocessing.LabelEncoder()
# 特征值编码
data[1] = le.fit_transform(data[1])
data[2] = le.fit_transform(data[2])
data[3] = le.fit_transform(data[3])
# 将normal.标签设置为1, 非normal.标签设置为0
data.loc[data[41] != 'normal.', 41] = 0
data.loc[data[41] == 'normal.', 41] = 1
data[41] = data[41].astype('int64')
# 第19列的特征全为0,无用,删掉
del data[19]
data.columns = list(range(41))
# 对特征值归一化
for i in range(40):
Max, Min = max(data.loc[:, i]), min(data.loc[:, i])
data.loc[:, i] = ((data.loc[:, i] - Min) / (Max - Min)).astype('float32')
# 制作pytorch识别的数据集和定义模型
train_data, train_label = torch.Tensor(data.loc[:494021, :39].values), torch.Tensor(data.loc[:494021, 40].values).long()
test_data, test_label = torch.Tensor(data.loc[494021:, :39, ].values), torch.Tensor(data.loc[494021:, 40].values).long()
train_dataset = Data.TensorDataset(train_data, train_label)
test_dataset = Data.TensorDataset(test_data, test_label)
# 制作Dataloder数据集,可迭代
train_loader = Data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = Data.DataLoader(test_dataset, batch_size=128)
# 如果是用gpu,就用gpu训练
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# 定义模型
num_inputs, num_hiddens, num_outputs = 40, 128, 23
net = nn.Sequential(
nn.Linear(num_inputs, num_hiddens),
nn.ReLU(),
nn.Linear(num_hiddens, 2 * num_hiddens),
nn.ReLU(),
nn.Linear(2 * num_hiddens, num_outputs)
)
net.to(device)
# 定义损失函数
loss = torch.nn.CrossEntropyLoss()
# 定义优化器
optimizer = torch.optim.Adam(net.parameters(), lr=lr)
# 训练
def train():
net.train()
batch_loss, correct, total = 0.0, 0.0, 0.0
for data, label in train_loader:
data, label = data.to(device), label.to(device)
net.zero_grad()
output = net(data)
l = loss(output, label)
l.backward()
optimizer.step()
predict_label = torch.argmax(output, dim=1)
correct += torch.sum(predict_label == label).cpu().item()
total += len(label)
batch_loss += l.cpu().item()
return correct / total, batch_loss / len(train_loader)
# 绘图
def pltfigure(x, y, title, id, data):
plt.subplot(2, 2, id)
plt.plot(range(len(data)), data)
plt.xlabel(x)
plt.ylabel(y)
plt.title(title)
plt.show()
# 测试
def test():
net.eval()
batch_loss, correct, total = 0.0, 0.0, 0.0
for data, label in test_loader:
data, label = data.to(device), label.to(device)
output = net(data)
batch_loss += loss(output, label).cpu().item()
predict_label = torch.argmax(output, dim=1)
correct += torch.sum(predict_label == label).cpu().item()
total += len(label)
return correct / total, batch_loss / len(test_loader)
# 主程序
def main():
print('training on: ', device)
print('batch_size:', batch_size)
print('epochs:', epochs)
print('learning_rate:', lr)
plt.figure()
train_acc_list, train_loss_list, test_acc_list, test_loss_list = [], [], [], []
for epoch in range(epochs):
train_acc, train_loss = train()
test_acc, test_loss = test()
print('epoch %d: train acc: %.2f%% train loss:%.4f, test acc: %.2f%%, test loss:%.4f'
% (epoch, 100 * train_acc, train_loss, 100 * test_acc, test_loss))
train_acc_list.append(train_acc)
train_loss_list.append(train_loss)
test_acc_list.append(test_acc)
test_loss_list.append(test_loss)
#绘图
pltfigure(x='epoch', y='acc', title='epoch-train_acc', id=1, data=train_acc_list)
pltfigure(x='epoch', y='loss', title='epoch-train_loss',id=2, data= train_loss_list)
pltfigure(x='epoch', y='acc', title='epoch-test_acc', id=3, data=test_acc_list)
pltfigure(x='epoch', y='loss', title='epoch-test_loss', id=4, data=test_loss_list)
main()
标签:acc,loss,KDDCup,学习,train,深度,test,label,data
From: https://www.cnblogs.com/lisyr44/p/17407252.html