import torch
import torch.nn as nn
#Github地址:https://github.com/zcablii/Large-Selective-Kernel-Network
#论文地址:https://openaccess.thecvf.com/content/ICCV2023/papers/Li_Large_Selective_Kernel_Network_for_Remote_Sensing_Object_Detection_ICCV_2023_paper.pdf
class LSKblock(nn.Module):
def __init__(self, dim):
super().__init__()
self.conv0 = nn.Conv2d(dim, dim, 5, padding=2, groups=dim)
self.conv_spatial = nn.Conv2d(dim, dim, 7, stride=1, padding=9, groups=dim, dilation=3)
self.conv1 = nn.Conv2d(dim, dim // 2, 1)
self.conv2 = nn.Conv2d(dim, dim // 2, 1)
self.conv_squeeze = nn.Conv2d(2, 2, 7, padding=3)
self.conv = nn.Conv2d(dim // 2, dim, 1)
def forward(self, x):
attn1 = self.conv0(x) # 3 64 32 32
attn2 = self.conv_spatial(attn1) # 3 64 32 32
attn1 = self.conv1(attn1) # 3 32 32 32
attn2 = self.conv2(attn2) # 3 32 32 32
attn = torch.cat([attn1, attn2], dim=1) # 3 64 32 32
avg_attn = torch.mean(attn, dim=1, keepdim=True) # 3 1 32 32
max_attn, _ = torch.max(attn, dim=1, keepdim=True) # 3 1 32 32
agg = torch.cat([avg_attn, max_attn], dim=1) # 3 2 32 32
sig = self.conv_squeeze(agg).sigmoid() # 3 2 32 32
# sig[:, 0, :, :].unsqueeze(1) 3 1 32 32 3 32 32 32
attn = attn1 * sig[:, 0, :, :].unsqueeze(1) + \
attn2 * sig[:, 1, :, :].unsqueeze(1)
attn = self.conv(attn) # 3 64 32 32
return x * attn
'''
初始输入 x:形状为 (batch_size, dim, height, width)
conv0 卷积 attn1:形状为 (batch_size, dim, height, width)
conv_spatial 卷积 attn2:形状为 (batch_size, dim, height, width)
conv1 调整通道数 attn1:形状为 (batch_size, dim // 2, height, width)
conv2 调整通道数 attn2:形状为 (batch_size, dim // 2, height, width)
连接 attn:形状为 (batch_size, dim, height, width)
按通道求平均值 avg_attn:形状为 (batch_size, 1, height, width)
按通道求最大值 max_attn:形状为 (batch_size, 1, height, width)
连接 agg:形状为 (batch_size, 2, height, width)
conv_squeeze 和 sigmoid 激活 sig:形状为 (batch_size, 2, height, width)
加权 attn:形状为 (batch_size, dim // 2, height, width)
最终 conv 卷积 attn:形状为 (batch_size, dim, height, width)
输出 x * attn:形状为 (batch_size, dim, height, width)
'''
if __name__ == '__main__':
block = LSKblock(64).cuda()
input = torch.rand(3, 64, 32, 32).cuda() #输入B C H W
output = block(input)
print(input.size(), output.size())
大核卷积的目的是为了获取权重 而不是最后的输出 嘴壶的输出是由大核卷积的权重和输入相乘得到的
原:先做55 然后在这个基础上做77 可以这样改 1、55 和77并列做 2、再加一个99 99和7*7并列 还可以尝试一下修改不同卷积核卷积的权重计算方式,原方式的权重是根据特征算出来的 能不能直接初始化一个权重,然后将这个权重作为可训练参数