目录
本文利用手搓的resnet18对树叶图片进行分类
课程是李沐大神的动手学深度学习
一、认识数据
将数据下载下来,解压后一共有四个文件,images文件夹里是所有的树叶图片,包括训练集和测试集
train.csv里有两行,一行是叶子的路径(例如images/0),第二行是标签(例maclura_pomifera),test.csv 里只有第一行。
二、数据预处理
首先先将数据加载进来,用dataloader转成可迭代的数据集。
# 定义自定义数据集
class CustomDataset(Dataset):
def __init__(self, csv_file, transform=None):
self.data = pd.read_csv(csv_file)
self.transform = transform
# 编码标签
self.label_encoder = LabelEncoder()
self.data['label'] = self.label_encoder.fit_transform(self.data['label']) # 字符串转为数字
#保存标签映射
self.classes_ = self.label_encoder.classes_
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
img_path = 'F:/data/classify-leaves/'+self.data.iloc[idx]['image'] #这里改为你保存的路径
label = self.data.iloc[idx]['label']
image = Image.open(img_path)
if self.transform:
image = self.transform(image)
return image, label
class PredictionDataset(Dataset):
def __init__(self, csv_file, transform=None):
self.data = pd.read_csv(csv_file)
self.transform = transform
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
img_path = 'F:/data/classify-leaves/' + self.data.iloc[idx]['image'] # 更改为你的路径
image = Image.open(img_path)
if self.transform:
image = self.transform(image)
return image
对于图片数据,通常对其做一些图像增强,随机剪裁、旋转、调整亮度、饱和度等,提高模型的泛化性。
# 定义数据增强和转换
transform = transforms.Compose([
# 随机裁剪图像,所得图像为原始面积的0.08到1之间,高宽比在3/4和4/3之间。
# 然后,缩放图像以创建224 x 224的新图像
transforms.RandomResizedCrop(224, scale=(0.08, 1.0), ratio=(3.0 / 4.0, 4.0 / 3.0)),
transforms.RandomHorizontalFlip(), # 随机水平翻转
transforms.RandomRotation(10), # 随机旋转
transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2), # 随机调整亮度、对比度、饱和度、色调
transforms.ToTensor(), # 转为 Tensor
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # 归一化
])
数据迭代器
# 使用示例
train_csv_file = 'F:/data/classify-leaves/train.csv' # 替换为你的 CSV 文件路径
dataset = CustomDataset(train_csv_file, transform=transform)
# 进行预测
test_csv_file = 'F:/data/classify-leaves/test.csv' # 替换为你的 CSV 文件路径
test_dataset = PredictionDataset(test_csv_file, transform=transform)
三、ResNet模型
ResNet可以让模型能够做到很深,且不会过拟合。
完整的ResNet代码
# 定义残差块
class ResidualBlock(nn.Module):
def __init__(self, in_channels, out_channels, stride=1):
super(ResidualBlock, self).__init__()
self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(out_channels)
self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(out_channels)
# shortcut
self.shortcut = nn.Sequential()
if stride != 1 or in_channels != out_channels:
self.shortcut = nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(out_channels)
)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.bn2(self.conv2(out))
out += self.shortcut(x)
out = F.relu(out)
return out
# 定义 ResNet
class ResNet(nn.Module):
def __init__(self, block, num_blocks, num_classes=10):
super(ResNet, self).__init__()
self.in_channels = 64
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Linear(512, num_classes)
def _make_layer(self, block, out_channels, blocks, stride):
layers = []
layers.append(block(self.in_channels, out_channels, stride))
self.in_channels = out_channels
for _ in range(1, blocks):
layers.append(block(out_channels, out_channels))
return nn.Sequential(*layers)
def forward(self, x):
x = F.relu(self.bn1(self.conv1(x)))
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.avgpool(x)
x = torch.flatten(x, 1)
x = self.fc(x)
return x
四、训练和预测
# 定义训练过程
def train_model(model, train_loader, criterion, optimizer, device):
model.train()
running_loss = 0.0
correct = 0.0
total = 0.0
for images, labels in train_loader:
images, labels = images.to(device), labels.to(device)
optimizer.zero_grad()
outputs = model(images)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
epoch_loss = running_loss / len(train_loader)
epoch_acc = correct / total
return epoch_loss, epoch_acc
#利用交叉验证集评估
def evaluate_model(model, val_loader, criterion, device):
model.eval()
running_loss = 0.0
correct = 0.0
total = 0.0
with torch.no_grad():
for images, labels in val_loader:
images, labels = images.to(device), labels.to(device)
outputs = model(images)
loss = criterion(outputs, labels)
running_loss += loss.item() * images.size(0)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
epoch_loss = running_loss / len(val_loader)
epoch_acc = correct / total
return epoch_loss, epoch_acc
def train_and_evaluate(dataset, num_epochs, batch_size, prediction_dataset,device):
num_classes = dataset.data['label'].nunique()
# 记录所有折的损失和准确率
history = {
"train_loss": [],
"train_acc": [],
"val_loss": [],
"val_acc": []
}
# 划分数据集
train_size = int(0.7 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])
model = ResNet(ResidualBlock, [2, 2, 2, 2], num_classes=num_classes).to(device)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001,weight_decay=0.0001)
for epoch in range(num_epochs):
train_loss, train_acc = train_model(model, train_loader, criterion, optimizer, device)
val_loss, val_acc = evaluate_model(model, val_loader, criterion, device)
history["train_loss"].append(train_loss)
history["train_acc"].append(train_acc)
history["val_loss"].append(val_loss)
history["val_acc"].append(val_acc)
if epoch % 10 == 0:
print(f"Epoch {epoch + 1}/{num_epochs} completed",end=" ")
print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}",end=" ")
print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")
final_predictions = predict(model, prediction_dataset, device)
return history, final_predictions
# 预测函数
def predict(model, dataset, device):
model.eval()
predictions = []
with torch.no_grad():
for img in DataLoader(dataset, batch_size=1, shuffle=False):
img = img.to(device)
output = model(img)
_, pred = torch.max(output, 1)
predictions.append(pred.item())
return predictions
# 绘制曲线
def plot_metrics(history):
epochs = range(1, len(history['train_loss']) + 1)
plt.figure(figsize=(12, 5))
# 绘制损失
plt.subplot(1, 2, 1)
plt.plot(epochs, history['train_loss'], label='Train Loss')
plt.plot(epochs, history['val_loss'], label='Validation Loss')
plt.title('Train and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
# 绘制准确率
plt.subplot(1, 2, 2)
plt.plot(epochs, history['train_acc'], label='Train Accuracy')
plt.plot(epochs, history['val_acc'], label='Validation Accuracy')
plt.title('Train and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()
# 使用示例
train_csv_file = 'F:/data/classify-leaves/train.csv' # 替换为你的 CSV 文件路径
dataset = CustomDataset(train_csv_file, transform=transform)
# 进行预测
test_csv_file = 'F:/data/classify-leaves/test.csv' # 替换为你的 CSV 文件路径
test_dataset = PredictionDataset(test_csv_file, transform=transform)
num_epochs = 200
batch_size = 32
history,final_predictions = train_and_evaluate(dataset, num_epochs, batch_size, test_dataset,device)
plot_metrics(history)
# 将预测结果转换为字符串
predicted_labels = dataset.label_encoder.inverse_transform(final_predictions)
# 输出结果
predicted_labels_df = pd.DataFrame({
'image': test_dataset.data['image'], # 获取对应图像路径
'label': predicted_labels
})
# 保存预测结果
predicted_labels_df.to_csv('F:/data/classify-leaves/submission.csv', index=False)
五、结果提交
结果不是特别好
改进:用更深的网络且pretrained的ResNet50、ResNet50、ResNext等,或者训练多个模型,结果进行集成。
我没有用k折交叉,也可以加上。
将手写的Resnet代码改为下列就可使用训练好的Resnet34。
# 是否冻住模型的前面一些层
def set_parameter_requires_grad(model, feature_extracting):
if feature_extracting:
model = model
for param in model.parameters():
param.requires_grad = False
# resnet34模型
def res_model(num_classes,feature_extracting = False, use_pretrained=True):
model_ft = models.resnet34(pretrained=use_pretrained)
set_parameter_requires_grad(model_ft, feature_extracting)
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, num_classes)
return model_ft
结果反而更差了,我也不知道为啥。。。。。
标签:loss,竞赛,树叶,self,kaggle,train,model,csv,data From: https://blog.csdn.net/chaomoon1/article/details/143351498