代码练习
1. 下载 Indian Pines 数据集
! wget http://www.ehu.eus/ccwintco/uploads/6/67/Indian_pines_corrected.mat
! wget http://www.ehu.eus/ccwintco/uploads/c/c4/Indian_pines_gt.mat
-
Indian Pines 是一个标准的高光谱数据集,广泛用于分类任务的研究。
2. 导入必要的库和模块
import numpy as np
import matplotlib.pyplot as plt
import scipy.io as sio
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report, cohen_kappa_score
import spectral
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
网络架构及训练
3. 定义 HybridSN 网络
- 3D卷积:用于提取光谱和空间信息。
- 2D卷积:提取更高级的空间特征。
- 全连接层:用于分类。
# 网络定义
class HybridSN(nn.Module):
def __init__(self, class_num=16):
super(HybridSN, self).__init__()
# 3D Convolutional Layers (for spectral feature extraction)
self.conv3d_1 = nn.Conv3d(in_channels=1, out_channels=8, kernel_size=(7, 3, 3), stride=1, padding=0)
self.conv3d_2 = nn.Conv3d(in_channels=8, out_channels=16, kernel_size=(5, 3, 3), stride=1, padding=0)
self.conv3d_3 = nn.Conv3d(in_channels=16, out_channels=32, kernel_size=(3, 3, 3), stride=1, padding=0)
# 2D Convolutional Layer (for spatial feature extraction)
self.conv2d = nn.Conv2d(in_channels=32 * 18, out_channels=64, kernel_size=(3, 3), stride=1, padding=0)
# Fully connected layers
self.fc1 = nn.Linear(64 * 17 * 17, 256)
self.fc2 = nn.Linear(256, 128)
self.fc3 = nn.Linear(128, class_num)
# Dropout layer
self.dropout = nn.Dropout(p=0.4)
def forward(self, x):
# Input: x is (batch_size, 1, spectral_bands, height, width)
# 3D Convolutions
x = F.relu(self.conv3d_1(x)) # Output: (batch_size, 8, 24, 23, 23)
x = F.relu(self.conv3d_2(x)) # Output: (batch_size, 16, 20, 21, 21)
x = F.relu(self.conv3d_3(x)) # Output: (batch_size, 32, 18, 19, 19)
# Reshape for 2D convolutions: (batch_size, 32*18, 19, 19)
x = x.view(x.size(0), 32 * 18, 19, 19)
# 2D Convolution
x = F.relu(self.conv2d(x)) # Output: (batch_size, 64, 17, 17)
# Flatten the feature map for fully connected layers
x = x.view(x.size(0), -1) # Output: (batch_size, 64 * 17 * 17 = 18496)
# Fully connected layers with dropout
x = F.relu(self.fc1(x))
x = self.dropout(x)
x = F.relu(self.fc2(x))
x = self.dropout(x)
# Output layer
x = self.fc3(x) # No activation, as this is for logits
return x
运行后,可以使用以下代码测试网络:
x = torch.randn(1, 1, 30, 25, 25) # 模拟输入
net = HybridSN(class_num=16)
y = net(x)
print(y.shape) # 输出应为 (1, 16)
-
图片:展示网络结构输出
y.shape
。height
数据处理及训练集/测试集划分
4. PCA 降维与数据预处理
# 对高光谱数据 X 应用 PCA 变换
def applyPCA(X, numComponents):
newX = np.reshape(X, (-1, X.shape[2]))
pca = PCA(n_components=numComponents, whiten=True)
newX = pca.fit_transform(newX)
newX = np.reshape(newX, (X.shape[0], X.shape[1], numComponents))
return newX
# 对单个像素周围提取 patch 时,边缘像素就无法取了,因此,给这部分像素进行 padding 操作
def padWithZeros(X, margin=2):
newX = np.zeros((X.shape[0] + 2 * margin, X.shape[1] + 2* margin, X.shape[2]))
x_offset = margin
y_offset = margin
newX[x_offset:X.shape[0] + x_offset, y_offset:X.shape[1] + y_offset, :] = X
return newX
# 在每个像素周围提取 patch ,然后创建成符合 keras 处理的格式
def createImageCubes(X, y, windowSize=5, removeZeroLabels = True):
# 给 X 做 padding
margin = int((windowSize - 1) / 2)
zeroPaddedX = padWithZeros(X, margin=margin)
# split patches
patchesData = np.zeros((X.shape[0] * X.shape[1], windowSize, windowSize, X.shape[2]))
patchesLabels = np.zeros((X.shape[0] * X.shape[1]))
patchIndex = 0
for r in range(margin, zeroPaddedX.shape[0] - margin):
for c in range(margin, zeroPaddedX.shape[1] - margin):
patch = zeroPaddedX[r - margin:r + margin + 1, c - margin:c + margin + 1]
patchesData[patchIndex, :, :, :] = patch
patchesLabels[patchIndex] = y[r-margin, c-margin]
patchIndex = patchIndex + 1
if removeZeroLabels:
patchesData = patchesData[patchesLabels>0,:,:,:]
patchesLabels = patchesLabels[patchesLabels>0]
patchesLabels -= 1
return patchesData, patchesLabels
def splitTrainTestSet(X, y, testRatio, randomState=345):
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=testRatio, random_state=randomState, stratify=y)
return X_train, X_test, y_train, y_test
- PCA 降维:减少数据的维度,提高训练效率。
训练和测试
5. 定义训练与测试数据集类
# 地物类别
class_num = 16
X = sio.loadmat('Indian_pines_corrected.mat')['indian_pines_corrected']
y = sio.loadmat('Indian_pines_gt.mat')['indian_pines_gt']
# 用于测试样本的比例
test_ratio = 0.90
# 每个像素周围提取 patch 的尺寸
patch_size = 25
# 使用 PCA 降维,得到主成分的数量
pca_components = 30
print('Hyperspectral data shape: ', X.shape)
print('Label shape: ', y.shape)
print('\n... ... PCA tranformation ... ...')
X_pca = applyPCA(X, numComponents=pca_components)
print('Data shape after PCA: ', X_pca.shape)
print('\n... ... create data cubes ... ...')
X_pca, y = createImageCubes(X_pca, y, windowSize=patch_size)
print('Data cube X shape: ', X_pca.shape)
print('Data cube y shape: ', y.shape)
print('\n... ... create train & test data ... ...')
Xtrain, Xtest, ytrain, ytest = splitTrainTestSet(X_pca, y, test_ratio)
print('Xtrain shape: ', Xtrain.shape)
print('Xtest shape: ', Xtest.shape)
# 改变 Xtrain, Ytrain 的形状,以符合 keras 的要求
Xtrain = Xtrain.reshape(-1, patch_size, patch_size, pca_components, 1)
Xtest = Xtest.reshape(-1, patch_size, patch_size, pca_components, 1)
print('before transpose: Xtrain shape: ', Xtrain.shape)
print('before transpose: Xtest shape: ', Xtest.shape)
# 为了适应 pytorch 结构,数据要做 transpose
Xtrain = Xtrain.transpose(0, 4, 3, 1, 2)
Xtest = Xtest.transpose(0, 4, 3, 1, 2)
print('after transpose: Xtrain shape: ', Xtrain.shape)
print('after transpose: Xtest shape: ', Xtest.shape)
""" Training dataset"""
class TrainDS(torch.utils.data.Dataset):
def __init__(self):
self.len = Xtrain.shape[0]
self.x_data = torch.FloatTensor(Xtrain)
self.y_data = torch.LongTensor(ytrain)
def __getitem__(self, index):
# 根据索引返回数据和对应的标签
return self.x_data[index], self.y_data[index]
def __len__(self):
# 返回文件数据的数目
return self.len
""" Testing dataset"""
class TestDS(torch.utils.data.Dataset):
def __init__(self):
self.len = Xtest.shape[0]
self.x_data = torch.FloatTensor(Xtest)
self.y_data = torch.LongTensor(ytest)
def __getitem__(self, index):
# 根据索引返回数据和对应的标签
return self.x_data[index], self.y_data[index]
def __len__(self):
# 返回文件数据的数目
return self.len
# 创建 trainloader 和 testloader
trainset = TrainDS()
testset = TestDS()
train_loader = torch.utils.data.DataLoader(dataset=trainset, batch_size=128, shuffle=True, num_workers=2)
test_loader = torch.utils.data.DataLoader(dataset=testset, batch_size=128, shuffle=False, num_workers=2)
!
6. 开始训练
# 使用GPU训练,可以在菜单 "代码执行工具" -> "更改运行时类型" 里进行设置
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# 网络放到GPU上
net = HybridSN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)
# 开始训练
total_loss = 0
for epoch in range(100):
for i, (inputs, labels) in enumerate(train_loader):
inputs = inputs.to(device)
labels = labels.to(device)
# 优化器梯度归零
optimizer.zero_grad()
# 正向传播 + 反向传播 + 优化
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
total_loss += loss.item()
print('[Epoch: %d] [loss avg: %.4f] [current loss: %.4f]' %(epoch + 1, total_loss/(epoch+1), loss.item()))
print('Finished Training')
- 图片内容:展示部分训练日志。
7. 模型测试
count = 0
# 模型测试
for inputs, _ in test_loader:
inputs = inputs.to(device)
outputs = net(inputs)
outputs = np.argmax(outputs.detach().cpu().numpy(), axis=1)
if count == 0:
y_pred_test = outputs
count = 1
else:
y_pred_test = np.concatenate( (y_pred_test, outputs) )
# 生成分类报告
classification = classification_report(ytest, y_pred_test, digits=4)
print(classification)
- 实验效果
备用函数
8. 生成完整报告
from operator import truediv
def AA_andEachClassAccuracy(confusion_matrix):
counter = confusion_matrix.shape[0]
list_diag = np.diag(confusion_matrix)
list_raw_sum = np.sum(confusion_matrix, axis=1)
each_acc = np.nan_to_num(truediv(list_diag, list_raw_sum))
average_acc = np.mean(each_acc)
return each_acc, average_acc
def reports (test_loader, y_test, name):
count = 0
# 模型测试
for inputs, _ in test_loader:
inputs = inputs.to(device)
outputs = net(inputs)
outputs = np.argmax(outputs.detach().cpu().numpy(), axis=1)
if count == 0:
y_pred = outputs
count = 1
else:
y_pred = np.concatenate( (y_pred, outputs) )
if name == 'IP':
target_names = ['Alfalfa', 'Corn-notill', 'Corn-mintill', 'Corn'
,'Grass-pasture', 'Grass-trees', 'Grass-pasture-mowed',
'Hay-windrowed', 'Oats', 'Soybean-notill', 'Soybean-mintill',
'Soybean-clean', 'Wheat', 'Woods', 'Buildings-Grass-Trees-Drives',
'Stone-Steel-Towers']
elif name == 'SA':
target_names = ['Brocoli_green_weeds_1','Brocoli_green_weeds_2','Fallow','Fallow_rough_plow','Fallow_smooth',
'Stubble','Celery','Grapes_untrained','Soil_vinyard_develop','Corn_senesced_green_weeds',
'Lettuce_romaine_4wk','Lettuce_romaine_5wk','Lettuce_romaine_6wk','Lettuce_romaine_7wk',
'Vinyard_untrained','Vinyard_vertical_trellis']
elif name == 'PU':
target_names = ['Asphalt','Meadows','Gravel','Trees', 'Painted metal sheets','Bare Soil','Bitumen',
'Self-Blocking Bricks','Shadows']
classification = classification_report(y_test, y_pred, target_names=target_names)
oa = accuracy_score(y_test, y_pred)
confusion = confusion_matrix(y_test, y_pred)
each_acc, aa = AA_andEachClassAccuracy(confusion)
kappa = cohen_kappa_score(y_test, y_pred)
return classification, confusion, oa*100, each_acc*100, aa*100, kappa*100
classification, confusion, oa, each_acc, aa, kappa = reports(test_loader, ytest, 'IP')
classification = str(classification)
confusion = str(confusion)
file_name = "classification_report.txt"
with open(file_name, 'w') as x_file:
x_file.write('\n')
x_file.write('{} Kappa accuracy (%)'.format(kappa))
x_file.write('\n')
x_file.write('{} Overall accuracy (%)'.format(oa))
x_file.write('\n')
x_file.write('{} Average accuracy (%)'.format(aa))
x_file.write('\n')
x_file.write('\n')
x_file.write('{}'.format(classification))
x_file.write('\n')
x_file.write('{}'.format(confusion))
# load the original image
X = sio.loadmat('Indian_pines_corrected.mat')['indian_pines_corrected']
y = sio.loadmat('Indian_pines_gt.mat')['indian_pines_gt']
height = y.shape[0]
width = y.shape[1]
X = applyPCA(X, numComponents= pca_components)
X = padWithZeros(X, patch_size//2)
# 逐像素预测类别
outputs = np.zeros((height,width))
for i in range(height):
for j in range(width):
if int(y[i,j]) == 0:
continue
else :
image_patch = X[i:i+patch_size, j:j+patch_size, :]
image_patch = image_patch.reshape(1,image_patch.shape[0],image_patch.shape[1], image_patch.shape[2], 1)
X_test_image = torch.FloatTensor(image_patch.transpose(0, 4, 3, 1, 2)).to(device)
prediction = net(X_test_image)
prediction = np.argmax(prediction.detach().cpu().numpy(), axis=1)
outputs[i][j] = prediction+1
if i % 20 == 0:
print('... ... row ', i, ' handling ... ...')
- 图片:
可视化预测结果
9. 展示预测结果
predict_image = spectral.imshow(classes=outputs.astype(int), figsize=(5, 5))
- 图片内容:展示最终预测结果图像。
在这里插入图片描述
问题总结
1. 为什么每次训练结果不同?
原因:
- 每次训练的随机性来自:
- 随机初始化:模型权重的初始化是随机的。
- 数据的随机打乱:训练集在每个 epoch 会重新打乱。
- Dropout:在训练过程中,部分神经元会被随机丢弃。
set_seed(42)
2. 如何提升高光谱图像的分类性能?
改进方向及公式:
-
数据增强:
- 添加高斯噪声或翻转图像。
-
模型优化:
- 添加更多层、使用残差结构(如 ResNet)。
-
引入注意力机制(Attention):
- 使用 SENet 或 Transformer 改进。
-
正则化:
- 使用 L2正则化 或 Dropout 避免过拟合。
公式:
-
损失函数 + L2 正则化:
L ( θ ) = L 0 + λ ∑ i ∥ θ i ∥ 2 L(\theta) = L_0 + \lambda \sum_{i} \|\theta_i\|^2 L(θ)=L0+λi∑∥θi∥2- 其中, L 0 L_0 L0 是原损失函数, λ \lambda λ 是正则化系数。
代码示例:
optimizer = optim.Adam(net.parameters(), lr=0.001, weight_decay=1e-5) # L2正则化
3. Depth-wise 卷积与分组卷积的区别与联系
公式与说明:
-
Depth-wise 卷积:
每个通道单独做卷积:
Y i = X i ∗ K i Y_i = X_i * K_i Yi=Xi∗Ki- 其中, X i X_i Xi 和 K i K_i Ki 分别是输入的第 i i i 个通道和卷积核。
-
分组卷积:
将通道分为 g g g 个组,每组内共享卷积:
Y i = ∑ j ∈ G i ( X j ∗ K j ) Y_i = \sum_{j \in G_i} (X_j * K_j) Yi=j∈Gi∑(Xj∗Kj)
代码示例:
# Depth-wise 卷积示例
depthwise_conv = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, groups=32)
# 分组卷积示例(将通道分为4组)
grouped_conv = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, groups=4)
4. SENet 的注意力机制是否可以用于空间位置?
可以。
公式:
-
SENet 的通道注意力公式:
z = σ ( W 2 δ ( W 1 s ) ) \mathbf{z} = \sigma(W_2 \delta(W_1 \mathbf{s})) z=σ(W2δ(W1s))- 其中, s \mathbf{s} s 是通道全局平均池化后的结果。
-
扩展到空间注意力:
使用空间位置的权重来关注重要区域:M s p a t i a l = σ ( Conv2D ( X ) ) \mathbf{M}_{spatial} = \sigma(\text{Conv2D}(X)) Mspatial=σ(Conv2D(X))
代码示例:
class SpatialAttention(nn.Module):
def __init__(self):
super(SpatialAttention, self).__init__()
self.conv = nn.Conv2d(2, 1, kernel_size=7, padding=3) # 空间注意力卷积
def forward(self, x):
avg_out = torch.mean(x, dim=1, keepdim=True) # 平均池化
max_out, _ = torch.max(x, dim=1, keepdim=True) # 最大池化
combined = torch.cat([avg_out, max_out], dim=1) # 合并
attention = torch.sigmoid(self.conv(combined)) # 注意力权重
return x * attention
5. ShuffleNet 中通道的 Shuffle 如何实现?
- 通道 Shuffle:打乱分组卷积的通道,以增强组内信息交换。
公式:
将通道重新排列:
X
=
X
.
r
e
s
h
a
p
e
(
N
,
G
,
C
/
/
G
,
H
,
W
)
.
p
e
r
m
u
t
e
(
0
,
2
,
1
,
3
,
4
)
.
r
e
s
h
a
p
e
(
N
,
C
,
H
,
W
)
X = X.reshape(N, G, C//G, H, W).permute(0, 2, 1, 3, 4).reshape(N, C, H, W)
X=X.reshape(N,G,C//G,H,W).permute(0,2,1,3,4).reshape(N,C,H,W)
- 其中, G G G 是组数, C C C 是通道数。
代码示例:
import torch
def channel_shuffle(x, groups):
batch_size, channels, height, width = x.size()
assert channels % groups == 0
# 将通道 reshape 成 (batch_size, groups, channels_per_group, height, width)
x = x.view(batch_size, groups, channels // groups, height, width)
# 交换 groups 和 channels_per_group 的维度
x = x.permute(0, 2, 1, 3, 4).contiguous()
# 恢复为原始形状
x = x.view(batch_size, channels, height, width)
return x
# 示例
input_tensor = torch.randn(1, 32, 28, 28) # 假设输入形状
output_tensor = channel_shuffle(input_tensor, groups=4)
print(output_tensor.shape) # 输出应为 (1, 32, 28, 28)
总结
MobileNet(V1、V2、V3)和 ShuffleNet 通过 深度可分离卷积、倒残差结构、分组卷积 和 通道 Shuffle 等技术,实现了高效轻量化网络,适用于资源受限的环境。SENet 和 CBAM 则利用 通道 和 空间注意力机制 提升模型对关键信息的感知能力。在高光谱图像分类任务中,结合 HybridSN 的 3D 和 2D 卷积 提取多层次特征,并采用 正则化 和 注意力模块 等优化方法,将进一步提高分类精度。这些技术的发展展示了轻量化与智能化网络的未来趋势,为移动设备和嵌入式系统中的深度学习应用提供了广泛的可能性。
标签:24,海洋大学,MobileNet,self,torch,patch,shape,test,size From: https://blog.csdn.net/aaaaaaaask/article/details/142992514