本学习笔记来源于B站: 迪哥128集强力打造:深度学习PyTorch从入门到实战 的第30-41个视频。
在本预训练-微调代码中,重点要学习的内容包括:
1、加载官方提供的经典网络架构和已经训练好的模型,对最后一层全连接层进行修改,改为适合自己任务的网络架构。在此基础上,训练最后一层全连接层,并保存最优模型。
2、加载保存的最优模型,再利用数据集对整个模型全部进行训练,进一步提高模型的性能。
注意:在进行迁移学习的过程中,通常会采取两步训练的策略:首先对预训练模型中的某些特定层进行微调,然后再对整个模型进行进一步的训练,这样有助于提高模型在新任务上的表现。
完整代码如下(建议复制到PyCharm中阅读):
#################### 导入工具包
import os
import matplotlib.pyplot as plt
import numpy as np
import torch
from torch import nn
import torch.optim as optim
import torchvision
from torchvision import transforms, models, datasets
import imageio
import time
import warnings
import random
import sys
import copy
import json
from PIL import Image
#################### 数据读取与预处理操作
data_dir = './flower_data'
train_dir = data_dir + '/train'
valid_dir = data_dir + '/valid'
#################### 制作数据源
data_transforms = {
'train': transforms.Compose([transforms.RandomRotation(45), # 随机旋转,-45到 45度之间随机选
transforms.CenterCrop(224), # 从中心开始裁剪
transforms.RandomHorizontalFlip(p=0.5),
transforms.RandomVerticalFlip(p=0.5),
# 随机水平翻转和随机垂直翻转,选择一个概率。p表示概率,p=0.5表示 50%的可能性翻转,50%的可能性不变
transforms.ColorJitter(brightness=0.2, contrast=0.1, saturation=0.1, hue=0.1),
# 参数 1为亮度,参数 2为对比度,参数 3为饱和度,参数 4为色相
transforms.RandomGrayscale(p=0.025),
# 概率转换成灰度率,3通道就是 R=G=B,2.5%的可能性将图像转换为灰度图
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # 执行标准化操作:均值、标准差
]),
'valid': transforms.Compose([transforms.Resize(256), # 验证的时候拿到的数据大小不确定,故需要 Resize为 256
transforms.CenterCrop(224), # 再裁剪为 224
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]),
}
#################### 构建数据集
batch_size = 8
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in ['train', 'valid']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size, shuffle=True) for x in ['train', 'valid']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'valid']}
class_names = image_datasets['train'].classes
#################### 读取标签对应的实际类别名字
with open('cat_to_name.json', 'r') as f:
cat_to_name = json.load(f)
#################### 展示数据(由于数据已转换为 tensor格式,因此要将 tensor格式再还原为 numpy格式,并且要还原回标准化的结果)
def im_convert(tensor):
image = tensor.to("cpu").clone().detach()
image = image.numpy().squeeze()
image = image.transpose(1, 2, 0)
image = image * np.array((0.229, 0.224, 0.225)) + np.array((0.485, 0.456, 0.406))
image = image.clip(0, 1)
return image
# 画一个子图
fig=plt.figure(figsize=(20, 12))
columns = 4
rows = 2
dataiter = iter(dataloaders['valid'])
inputs, classes = dataiter.next()
for idx in range (columns*rows):
ax = fig.add_subplot(rows, columns, idx+1, xticks=[], yticks=[])
ax.set_title(cat_to_name[str(int(class_names[classes[idx]]))])
plt.imshow(im_convert(inputs[idx]))
plt.show()
#################### 加载 models中提供的模型,并且直接用训练好的权重参数当作初始参数
model_name = 'resnet' # 可选的比较多 ['resnet', 'alexnet', 'vgg', 'squeezenet', 'densenet', 'inception']
feature_extract = True # 是否用人家训练好的特征来做(True表示使用)
#################### 是否用 GPU训练
train_on_gpu = torch.cuda.is_available()
if not train_on_gpu:
print('CUDA is not available. Training on CPU ...')
else:
print('CUDA is available! Training on GPU ...')
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
#################### 指定冻住哪些层
def set_parameter_requires_grad(model, feature_extracting):
if feature_extracting:
for param in model.parameters(): # 遍历 model 中的所有参数
param.requires_grad = False
# 将每个参数的 requires_grad 属性设置为 False。这意味着这些参数在训练过程中不会计算梯度,因此不会更新(即冻住某些层)。
#################### 加载神经网络
model_ft = models.resnet152() # 用 resnet152这个网络模型
print(model_ft)
#################### 参考 pytorch官网上的例子
def initialize_model(model_name, num_classes, feature_extract, use_pretrained=True):
# 选择合适的模型,不同模型的初始化方法稍微有点区别
model_ft = None
input_size = 0
# 仅使用 resnet进行尝试,后面的网络不作尝试
if model_name == "resnet":
model_ft = models.resnet152(pretrained=use_pretrained)
# 调用 models 模块中的 resnet152 函数,创建一个 ResNet152 模型的实例
# pretrained=use_pretrained 表示是否使用预训练的模型参数,use_pretrained的值决定是否从 ImageNet数据集中加载预训练的权重
set_parameter_requires_grad(model_ft, feature_extract)
num_ftrs = model_ft.fc.in_features
# 获取 model_ft 的全连接层 (fc) 的输入特征数。(得到最后一层的参数,此处是 2048)
model_ft.fc = nn.Sequential(nn.Linear(num_ftrs, 102),
nn.LogSoftmax(dim=1))
# 使用 torch.nn 模块的 Sequential 容器创建一个新的全连接层。新的全连接层包含两部分:
# nn.Linear(num_ftrs, 102):这是一个线性层,将 num_ftrs 个输入特征映射到 102 个输出特征。
# nn.LogSoftmax(dim=1):适用于分类任务的激活函数,输出每个类别的对数概率。
input_size = 224
elif model_name == "alexnet":
""" Alexnet
"""
model_ft = models.alexnet(pretrained=use_pretrained)
set_parameter_requires_grad(model_ft, feature_extract)
num_ftrs = model_ft.classifier[6].in_features
model_ft.classifier[6] = nn.Linear(num_ftrs,num_classes)
input_size = 224
elif model_name == "vgg":
""" VGG11_bn
"""
model_ft = models.vgg16(pretrained=use_pretrained)
set_parameter_requires_grad(model_ft, feature_extract)
num_ftrs = model_ft.classifier[6].in_features
model_ft.classifier[6] = nn.Linear(num_ftrs,num_classes)
input_size = 224
elif model_name == "squeezenet":
""" Squeezenet
"""
model_ft = models.squeezenet1_0(pretrained=use_pretrained)
set_parameter_requires_grad(model_ft, feature_extract)
model_ft.classifier[1] = nn.Conv2d(512, num_classes, kernel_size=(1,1), stride=(1,1))
model_ft.num_classes = num_classes
input_size = 224
elif model_name == "densenet":
""" Densenet
"""
model_ft = models.densenet121(pretrained=use_pretrained)
set_parameter_requires_grad(model_ft, feature_extract)
num_ftrs = model_ft.classifier.in_features
model_ft.classifier = nn.Linear(num_ftrs, num_classes)
input_size = 224
elif model_name == "inception":
""" Inception v3
Be careful, expects (299,299) sized images and has auxiliary output
"""
model_ft = models.inception_v3(pretrained=use_pretrained)
set_parameter_requires_grad(model_ft, feature_extract)
# Handle the auxilary net
num_ftrs = model_ft.AuxLogits.fc.in_features
model_ft.AuxLogits.fc = nn.Linear(num_ftrs, num_classes)
# Handle the primary net
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs,num_classes)
input_size = 299
else:
print("Invalid model name, exiting...")
exit()
return model_ft, input_size
#################### 设置哪些层需要训练
model_ft, input_size = initialize_model(model_name, 102, feature_extract, use_pretrained=True)
model_ft = model_ft.to(device) # 使用 GPU计算
filename='checkpoint.pth' # 保存模型的名字
# 是否训练所有层
params_to_update = model_ft.parameters()
print("Params to learn:")
if feature_extract:
params_to_update = []
for name,param in model_ft.named_parameters(): # 使用 model_ft.named_parameters() 方法迭代模型中的每个参数
if param.requires_grad == True: # requires_grad 属性决定了该参数是否在训练过程中需要计算梯度。
params_to_update.append(param) # 如果当前参数的 requires_grad 为真,则将其添加到 params_to_update 列表中,表示需要更新。
print("\t",name)
else:
for name,param in model_ft.named_parameters():
if param.requires_grad == True:
print("\t",name)
print(model_ft)
#################### 优化器设置
optimizer_ft = optim.Adam(params_to_update, lr=1e-2) # 设置优化器(Adam和 SGD都可以)
scheduler = optim.lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
# optim.lr_scheduler.StepLR:学习率衰减策略(step_size=7, gamma=0.1表示原来的学习率是 0.1,经过 7次迭代过后,变成 0.01)
criterion = nn.NLLLoss()
# 最后一层已经 LogSoftmax()了,所以不能 nn.CrossEntropyLoss()来计算了,nn.CrossEntropyLoss()相当于 logSoftmax()和 nn.NLLLoss()整合
#################### 训练模块
def train_model(model, dataloaders, criterion, optimizer, num_epochs=25, is_inception=False, filename=filename):
# is_inception=False表示要不要用一些其他的网络,暂时不用管
since = time.time()
best_acc = 0 # 保存最好的准确率
model.to(device) # 指定 GPU还是 CPU进行训练
val_acc_history = []
train_acc_history = []
train_losses = []
valid_losses = []
LRs = [optimizer.param_groups[0]['lr']]
best_model_wts = copy.deepcopy(model.state_dict()) # 将最好的模型和参数存下来
for epoch in range(num_epochs):
print('Epoch {}/{}'.format(epoch, num_epochs - 1))
print('-' * 10)
for phase in ['train', 'valid']:
if phase == 'train':
model.train() # 训练
else:
model.eval() # 验证
running_loss = 0.0
running_corrects = 0
# 把数据都取个遍
for inputs, labels in dataloaders[phase]:
inputs = inputs.to(device) # 将当前的输入(数据)传到 GPU中
labels = labels.to(device)
optimizer.zero_grad() # 清零
with torch.set_grad_enabled(phase == 'train'): # 只有训练的时候计算和更新梯度
if is_inception and phase == 'train': # 这一块针对的是其他网络(不用看)
outputs, aux_outputs = model(inputs)
loss1 = criterion(outputs, labels)
loss2 = criterion(aux_outputs, labels)
loss = loss1 + 0.4 * loss2
else: # resnet执行的是这里
outputs = model(inputs)
loss = criterion(outputs, labels)
_, preds = torch.max(outputs, 1)
# 训练阶段更新权重
if phase == 'train':
loss.backward()
optimizer.step()
# 计算损失
running_loss += loss.item() * inputs.size(0)
running_corrects += torch.sum(preds == labels.data)
epoch_loss = running_loss / len(dataloaders[phase].dataset)
epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)
time_elapsed = time.time() - since
print('Time elapsed {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))
# 得到最好的那次模型
if phase == 'valid' and epoch_acc > best_acc:
# 如果现在是验证集,且得到的准确率比最好的准确率还要高
best_acc = epoch_acc
best_model_wts = copy.deepcopy(model.state_dict())
# 将当前最好的权重参数复制过来
state = {
'state_dict': model.state_dict(),
'best_acc': best_acc,
'optimizer': optimizer.state_dict(),
}
torch.save(state, filename)
if phase == 'valid':
val_acc_history.append(epoch_acc)
valid_losses.append(epoch_loss)
scheduler.step(epoch_loss)
if phase == 'train':
train_acc_history.append(epoch_acc)
train_losses.append(epoch_loss)
print('Optimizer learning rate : {:.7f}'.format(optimizer.param_groups[0]['lr']))
LRs.append(optimizer.param_groups[0]['lr'])
print()
time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
print('Best val Acc: {:4f}'.format(best_acc))
# 训练完后用最好的一次当做模型最终的结果
model.load_state_dict(best_model_wts)
return model, val_acc_history, train_acc_history, valid_losses, train_losses, LRs
#################### 开始训练(此处只训练最后一层全连接层)
model_ft, val_acc_history, train_acc_history, valid_losses, train_losses, LRs = train_model(model_ft, dataloaders, criterion, optimizer_ft, num_epochs=2, is_inception=(model_name=="inception"))
#################### 再继续训练所有层(此处所有层都进行训练)
for param in model_ft.parameters():
param.requires_grad = True # 注意此处是 True,前面只训练最后一层时是 False
#################### 再继续训练所有的参数,学习率调小一点
optimizer = optim.Adam(params_to_update, lr=1e-4)
scheduler = optim.lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
criterion = nn.NLLLoss()
checkpoint = torch.load(filename)
# 将刚才训练好的且已经保存了的模型 checkpoint.pth导入进来(不是说前面训练好的模型不要了,而是在此基础上继续训练模型,锦上添花)
best_acc = checkpoint['best_acc']
model_ft.load_state_dict(checkpoint['state_dict'])
optimizer.load_state_dict(checkpoint['optimizer'])
model_ft, val_acc_history, train_acc_history, valid_losses, train_losses, LRs = train_model(model_ft, dataloaders, criterion, optimizer, num_epochs=10, is_inception=(model_name=="inception"))
#################### 加载训练好的模型 (测试某张图片)
model_ft, input_size = initialize_model(model_name, 102, feature_extract, use_pretrain=True)
model_ft = model_ft.to(device)
filename = 'seriouscheckpoint.pth'
# 加载模型
checkpoint = torch.load(filename)
best_acc = checkpoint['best_acc']
model_ft.load_state_dict(checkpoint['state_dict'])
#################### 测试数据预处理
# 测试数据处理方法需要跟训练时一模一样才可以
def process_image(image_path):
img = Image.open(image_path)
# Resize,thumbnail方法只能进行缩小,所以进行了判断
if img.size[0] > img.size[1]:
img.thumbnail((10000, 256))
else:
img.thumbnail((256, 10000))
# Crop操作
left_margin = (img.width - 224) / 2
bottom_margin = (img.height - 224) / 2
right_margin = left_margin + 224
top_margin = bottom_margin + 224
img = img.crop((left_margin, bottom_margin, right_margin,
top_margin))
# 相同的预处理方法
img = np.array(img) / 255
mean = np.array([0.485, 0.456, 0.406]) # provided mean
std = np.array([0.229, 0.224, 0.225]) # provided std
img = (img - mean) / std
# 注意颜色通道应该放在第一个位置
img = img.transpose((2, 0, 1))
return img
def imshow(image, ax=None, title=None):
"""展示数据"""
if ax is None:
fig, ax = plt.subplots()
# 颜色通道还原
image = np.array(image).transpose((1, 2, 0))
# 预处理还原
mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])
image = std * image + mean
image = np.clip(image, 0, 1)
ax.imshow(image)
ax.set_title(title)
return ax
image_path = 'image_06621.jpg'
img = process_image(image_path)
imshow(img)
# 得到一个 batch的测试数据
dataiter = iter(dataloaders['valid'])
images, labels = dataiter.next()
model_ft.eval()
if train_on_gpu:
output = model_ft(images.cuda())
else:
output = model_ft(images)
# 得到概率最大的那个
_, preds_tensor = torch.max(output, 1)
preds = np.squeeze(preds_tensor.numpy()) if not train_on_gpu else np.squeeze(preds_tensor.cpu().numpy())
#################### 展示预测结果
fig=plt.figure(figsize=(20, 20))
columns =4
rows = 2
for idx in range (columns*rows):
ax = fig.add_subplot(rows, columns, idx+1, xticks=[], yticks=[])
plt.imshow(im_convert(images[idx]))
ax.set_title("{} ({})".format(cat_to_name[str(preds[idx])], cat_to_name[str(labels[idx].item())]),
color=("green" if cat_to_name[str(preds[idx])]==cat_to_name[str(labels[idx].item())] else "red"))
plt.show()
标签:acc,num,ft,训练,image,笔记,train,架构,model
From: https://blog.csdn.net/zly19980718/article/details/143748677