PyTorch搭建AlexNet实现猫狗分类
一篇简单的学习笔记
学习视频:https://www.bilibili.com/video/BV18L4y167jr/?spm_id_from=333.1007.top_right_bar_window_custom_collection.content.click
数据集以及代码,训练后的权重,这里放到是炮哥的(点我),提取码:03xd
一、环境准备
可以去看上一篇博客,里面写的很详细了,并且推荐了一篇炮哥的环境搭建环境
二、模型搭建、训练
1.整体框图
AlexNet整体框图,padding,stride,需要根据论文计算,前后卷积大小没变,一般padding=2
2.net.py
网络整体结构代码
1 import torch 2 from torch import nn 3 import torch.nn.functional as F 4 5 class MyAlexNet(nn.Module): 6 def __init__(self): 7 super(MyAlexNet, self).__init__() 8 self.c1 = nn.Conv2d(in_channels=3,out_channels=48,kernel_size=11,stride=4,padding=2) 9 self.ReLu = nn.ReLU() 10 self.c2 = nn.Conv2d(in_channels=48,out_channels=128,kernel_size=5,stride=1,padding=2) 11 self.s2 = nn.MaxPool2d(2) 12 self.c3 = nn.Conv2d(in_channels=128,out_channels=192,kernel_size=3,stride=1,padding=2) 13 self.s3 = nn.MaxPool2d(2) 14 self.c4 = nn.Conv2d(in_channels=192,out_channels=192,kernel_size=3,stride=1,padding=1) 15 self.c5 = nn.Conv2d(in_channels=192,out_channels=128,kernel_size=3,stride=1,padding=1) 16 self.s5 = nn.MaxPool2d(kernel_size=3,stride=2) 17 self.flatten = nn.Flatten() 18 self.f6 = nn.Linear(4608,2048)#经过池化后的神经元个数(13-3)/2+1=6,6*6*128=4608 19 self.f7 = nn.Linear(2048,2048) 20 self.f8 = nn.Linear(2048,1000) 21 self.f9 = nn.Linear(1000,2)#二分类 22 23 def forward(self,x): 24 x = self.ReLu(self.c1(x)) 25 x = self.ReLu(self.c2(x)) 26 x = self.s2(x) 27 x = self.ReLu(self.c3(x)) 28 x = self.s3(x) 29 x = self.ReLu(self.c4(x)) 30 x = self.ReLu(self.c5(x)) 31 x = self.s5(x) 32 x = self.flatten(x) 33 x = self.f6(x) 34 x = F.dropout(x,0.5) 35 x = self.f7(x) 36 x = F.dropout(x,0.5) 37 x = self.f8(x) 38 x = F.dropout(x,0.5) 39 x = self.f9(x) 40 41 return x 42 43 if __name__ =="__main__": 44 x = torch.rand([1, 3, 224, 224]) 45 model = MyAlexNet() 46 y = model(x)
写完后保存,运行可以检查是否报错
3.数据划分
拿到数据后,每个类别应该都有1w多张
运行下面代码将数据按一定比例,划分为训练集和验证集
1 import os 2 from shutil import copy 3 import random 4 5 6 def mkfile(file): 7 if not os.path.exists(file): 8 os.makedirs(file) 9 10 11 # 获取data文件夹下所有文件夹名(即需要分类的类名) 12 file_path = 'data_name1' #需要划分数据集的路径 13 flower_class = [cla for cla in os.listdir(file_path)] 14 15 # 创建 训练集train 文件夹,并由类名在其目录下创建5个子目录 16 mkfile('data/train') 17 for cla in flower_class: 18 mkfile('data/train/' + cla) 19 20 # 创建 验证集val 文件夹,并由类名在其目录下创建子目录 21 mkfile('data/val') 22 for cla in flower_class: 23 mkfile('data/val/' + cla) 24 25 # 划分比例 26 split_rate = 0.2 #20%为验证集 27 28 # 遍历所有类别的全部图像并按比例分成训练集和验证集 29 for cla in flower_class: 30 cla_path = file_path + '/' + cla + '/' # 某一类别的子目录 31 images = os.listdir(cla_path) # iamges 列表存储了该目录下所有图像的名称 32 num = len(images) 33 eval_index = random.sample(images, k=int(num * split_rate)) # 从images列表中随机抽取 k 个图像名称 34 for index, image in enumerate(images): 35 # eval_index 中保存验证集val的图像名称 36 if image in eval_index: 37 image_path = cla_path + image 38 new_path = 'data/val/' + cla 39 copy(image_path, new_path) # 将选中的图像复制到新路径 40 41 # 其余的图像保存在训练集train中 42 else: 43 image_path = cla_path + image 44 new_path = 'data/train/' + cla 45 copy(image_path, new_path) 46 print("\r[{}] processing [{}/{}]".format(cla, index + 1, num), end="") # processing bar 47 print() 48 49 print("processing done!")
划分好后应该是这样的格式
这样后就可以进行下一步训练了。
4.train.py
训练的代码,训练结束后画出训练集和验证集的loss,准确度
1 import torch 2 from torch import nn 3 from NET import MyAlexNet 4 import numpy as np 5 6 from torch.optim import lr_scheduler 7 import os 8 9 from torchvision import transforms 10 from torchvision.datasets import ImageFolder 11 from torch.utils.data import DataLoader 12 13 import matplotlib.pyplot as plt 14 15 # 解决中文显示问题 16 plt.rcParams['font.sans-serif'] = ['SimHei'] 17 plt.rcParams['axes.unicode_minus'] = False 18 19 ROOT_TRAIN = 'data/train' 20 ROOT_TEST = 'data/val' 21 22 # 将图像RGB三个通道的像素值分别减去0.5,再除以0.5.从而将所有的像素值固定在[-1,1]范围内 23 normalize = transforms.Normalize(std=[0.5,0.5,0.5],mean=[0.5,0.5,0.5])#image=(image-mean)/std 24 train_transform = transforms.Compose([ 25 transforms.Resize((224,224)),#裁剪为224*224 26 transforms.RandomVerticalFlip(),#随机垂直旋转 27 transforms.ToTensor(),#将0-255范围内的像素转为0-1范围内的tensor 28 normalize#归一化 29 ]) 30 31 val_transform = transforms.Compose([ 32 transforms.Resize((224,224)),#裁剪为224*224 33 transforms.ToTensor(),#将0-255范围内的像素转为0-1范围内的tensor 34 normalize#归一化 35 ]) 36 37 train_dataset = ImageFolder(ROOT_TRAIN,transform=train_transform)#ImageFolder()根据文件夹名来对图像添加标签 38 val_dataset = ImageFolder(ROOT_TEST,transform=val_transform)#可以利用print(val_dataset.imgs)对象查看,返回列表形式('data/val\\cat\\110.jpg', 0) 39 #print(val_dataset.imgs) 40 41 train_dataloader = DataLoader(train_dataset,batch_size=32,shuffle=True) 42 val_dataloader = DataLoader(val_dataset,batch_size=32,shuffle=True) 43 44 # 如果显卡可用,则用显卡进行训练 45 device = 'cuda' if torch.cuda.is_available() else 'cpu' 46 47 # 调用net里面的定义的网络模型, 如果GPU可用则将模型转到GPU 48 model = MyAlexNet().to(device) 49 50 #定义损失函数 51 loss_fn = nn.CrossEntropyLoss() 52 53 #定义优化器 54 optimizer = torch.optim.SGD(model.parameters(),lr=0.01,momentum=0.9) 55 # 学习率每隔10epoch变为原来的0.1 56 lr_scheduler = lr_scheduler.StepLR(optimizer,step_size=10,gamma=0.5) 57 58 #定义训练函数 59 def train(dataloader,model,loss_fn,optimizer): 60 loss,current,n = 0.0,0.0,0 61 for batch,(x,y) in enumerate(dataloader):#enumerate()默认两个参数,第一个用于记录序号,默认0开始,第二个参数(x,y)才是需要遍历元素(dataloder)的值 62 #前向传播 63 image,y = x.to(device),y.to(device) 64 output = model(image) 65 cur_loss = loss_fn(output,y) 66 _,pred = torch.max(output,axis=-1) 67 cur_acc = torch.sum(y==pred)/output.shape[0] 68 #反向传播 69 optimizer.zero_grad()#梯度归零 70 cur_loss.backward() 71 optimizer.step() 72 loss += cur_loss 73 current += cur_acc 74 n +=1 75 train_loss = loss / n 76 train_acc = current / n 77 print(f'train_loss:{train_loss}') 78 print(f'train_acc:{train_acc}') 79 return train_loss,train_acc 80 81 #定义验证函数 82 def val(dataloader,model,loss_fn): 83 #转换为验证模型 84 model.eval() 85 loss, current, n = 0.0, 0.0, 0 86 with torch.no_grad(): 87 for batch, (x, y) in enumerate(dataloader): # enumerate()默认两个参数,第一个用于记录序号,默认0开始,第二个参数(x,y)才是需要遍历元素(dataloder)的值 88 # 前向传播 89 image, y = x.to(device), y.to(device) 90 output = model(image) 91 cur_loss = loss_fn(output, y) 92 _, pred = torch.max(output, axis=-1) 93 cur_acc = torch.sum(y == pred) / output.shape[0] 94 loss += cur_loss 95 current += cur_acc 96 n += 1 97 98 val_loss = loss / n 99 val_acc = current / n 100 print(f'val_loss:{val_loss}') 101 print(f'val_acc:{val_acc}') 102 return val_loss, val_acc 103 104 #画图函数 105 def matplot_loss(train_loss,val_loss): 106 plt.plot(train_loss,label='train_loss')#画图 107 plt.plot(val_loss, label='val_loss') 108 plt.legend(loc='best')#图例 109 plt.ylabel('loss',fontsize=12) 110 plt.xlabel('epoch',fontsize=12) 111 plt.title("训练集和验证集loss对比图") 112 plt.show() 113 114 def matplot_acc(train_acc,val_acc): 115 plt.plot(train_acc, label='train_acc') # 画图 116 plt.plot(val_acc, label='val_acc') 117 plt.legend(loc='best') # 图例 118 plt.ylabel('acc', fontsize=12) 119 plt.xlabel('epoch', fontsize=12) 120 plt.title("训练集和验证集acc对比图") 121 plt.show() 122 123 #开始训练 124 train_loss_list = [] 125 val_loss_list = [] 126 train_acc_list = [] 127 val_acc_list = [] 128 129 epoch = 50 130 131 max_acc = 0 132 for t in range(epoch): 133 lr_scheduler.step()#学习率迭代,10epoch变为原来的0.5 134 print((f"epoch{t+1}\n----------------------")) 135 train_loss,train_acc = train(train_dataloader,model,loss_fn,optimizer) 136 val_loss,val_acc = val(val_dataloader,model,loss_fn) 137 138 train_loss_list.append(train_loss) 139 train_acc_list.append(train_acc) 140 val_loss_list.append(val_loss) 141 val_acc_list.append(val_acc) 142 #保存最好的模型权重 143 if val_acc >max_acc: 144 folder = 'save_model' 145 if not os.path.exists(folder): 146 os.mkdir('save_model') 147 max_acc = val_acc 148 print(f'save best model,第{t+1}轮') 149 torch.save(model.state_dict(),'save_model/best_model.pth')#保存 150 #保存最后一轮 151 if t == epoch - 1: 152 torch.save(model.state_dict(), 'save_model/last_model.pth') # 保存 153 print("done") 154 155 #画图 156 matplot_loss(train_loss_list,val_loss_list) 157 matplot_acc(train_acc_list,val_acc_list)
训练结束后可以看到自己训练结果,由于我每个类别只用了2000张图片,也就是训练集+验证集的猫狗图片总共只有4000张,50轮最后结束后验证集只有70的准确度,而炮哥有80多,感觉是训练数据太少的原因,我是图方便,走个流程
如果想提高准确度,需要自己去对模型参数,慢慢提炼了
三、模型测试
测试代码,这里用的测试集其实是之前训练验证集,本来是要另外创建一个的
1 import torch 2 from NET import MyAlexNet 3 import random 4 from torch.autograd import Variable 5 from torchvision import datasets, transforms 6 from torchvision.transforms import ToPILImage 7 from torchvision.datasets import ImageFolder 8 9 ROOT_TEST = 'data/val' 10 11 normalize = transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) 12 13 # 将图像RGB三个通道的像素值分别减去0.5,再除以0.5.从而将所有的像素值固定在[-1,1]范围内 14 normalize = transforms.Normalize(std=[0.5,0.5,0.5],mean=[0.5,0.5,0.5])#image=(image-mean)/std 15 16 val_transform = transforms.Compose([ 17 transforms.Resize((224,224)),#裁剪为224*224 18 transforms.ToTensor(),#将0-255范围内的像素转为0-1范围内的tensor 19 ]) 20 21 val_dataset = ImageFolder(ROOT_TEST,transform=val_transform)#可以利用print(val_dataset.imgs)对象查看,返回列表形式('data/val\\cat\\110.jpg', 0) 22 23 # 如果显卡可用,则用显卡进行训练 24 device = 'cuda' if torch.cuda.is_available() else 'cpu' 25 # 调用net里面的定义的网络模型, 如果GPU可用则将模型转到GPU 26 model = MyAlexNet().to(device) 27 28 #加载模型训练权重 29 model.load_state_dict(torch.load("best_model.pth"))#这里用的是炮哥的权重 30 classes = [ 31 'cat', 32 'dog', 33 ] 34 35 #把tensor 转成Image,方便可视化 36 show = ToPILImage() 37 #进入验证模式 38 model.eval() 39 40 # 推理验证 41 for i in range(20):#显示前几张 42 x,y = val_dataset[i][0],val_dataset[i][1]#可以根据你自己的划分比例来算狗是第几张开始,然后给i加上,就可以对狗的图片推理 43 show(x).show() 44 x = Variable(torch.unsqueeze(x,dim=0).float(),requires_grad=False).to(device) 45 x = torch.tensor(x).to(device) 46 with torch.no_grad(): 47 pred = model(x) 48 predicted,actual = classes[torch.argmax(pred[0])],classes[y] 49 print(f'Predicted:{predicted},Actual:{actual}')
运行代码后,就可以看到模型预测的结果和真实值比对
总结
流程还是很顺利的,就是最后模型没有达到理想结果,估计没训练好,可能网络对于现在的精度需要,也不够
自己敲一下代码,会学到很多不懂的东西
比如ImageFolder()这个函数,是按照文件夹名字,来给文件夹里的数据打上标签
可以利用print(val_dataset.imgs)对象查看,返回列表形式('data/val\\cat\\110.jpg', 0)
最后,多看,多学,多试,总有一天你会称为大佬!
标签:acc,loss,val,self,0.5,PyTorch,train,AlexNet,搭建 From: https://www.cnblogs.com/zhangjie123/p/16759707.html