目录
YOLOV7主要的贡献在于:
1.将模型重参数化引入到网络架构中,重参数化这一思想最早出现于REPVGG中。
2.标签分配策略采用的是YOLOV5的跨网格搜索,以及YOLOX的匹配策略。
3.提出的一个新的E-ELAN高效网络架构,以高效为主。
4.提出了辅助头的一个训练方法RepConv层,主要目的是通过增加训练成本,提升精度,同时不影响推理的时间,因为辅助头只会出现在训练过程中。
1.网络结构
-
首先需要对模型初始化,堆叠好网络的各层,直接读入配置文件
-
backbone
- head
展开代码
def parse_model(d, ch): # model_dict, input_channels(3)
logger.info('\n%3s%18s%3s%10s %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments'))
anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']
na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors
no = na * (nc + 5) # number of outputs = anchors * (classes + 5)
layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out
for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args
m = eval(m) if isinstance(m, str) else m # eval strings
for j, a in enumerate(args):
try:
args[j] = eval(a) if isinstance(a, str) else a # eval strings
except:
pass
n = max(round(n * gd), 1) if n > 1 else n # depth gain
if m in [nn.Conv2d, Conv, RobustConv, RobustConv2, DWConv, GhostConv, RepConv, RepConv_OREPA, DownC,
SPP, SPPF, SPPCSPC, GhostSPPCSPC, MixConv2d, Focus, Stem, GhostStem, CrossConv,
Bottleneck, BottleneckCSPA, BottleneckCSPB, BottleneckCSPC,
RepBottleneck, RepBottleneckCSPA, RepBottleneckCSPB, RepBottleneckCSPC,
Res, ResCSPA, ResCSPB, ResCSPC,
RepRes, RepResCSPA, RepResCSPB, RepResCSPC,
ResX, ResXCSPA, ResXCSPB, ResXCSPC,
RepResX, RepResXCSPA, RepResXCSPB, RepResXCSPC,
Ghost, GhostCSPA, GhostCSPB, GhostCSPC,
SwinTransformerBlock, STCSPA, STCSPB, STCSPC,
SwinTransformer2Block, ST2CSPA, ST2CSPB, ST2CSPC]:
c1, c2 = ch[f], args[0]
if c2 != no: # if not output
c2 = make_divisible(c2 * gw, 8)
args = [c1, c2, *args[1:]]
if m in [DownC, SPPCSPC, GhostSPPCSPC,
BottleneckCSPA, BottleneckCSPB, BottleneckCSPC,
RepBottleneckCSPA, RepBottleneckCSPB, RepBottleneckCSPC,
ResCSPA, ResCSPB, ResCSPC,
RepResCSPA, RepResCSPB, RepResCSPC,
ResXCSPA, ResXCSPB, ResXCSPC,
RepResXCSPA, RepResXCSPB, RepResXCSPC,
GhostCSPA, GhostCSPB, GhostCSPC,
STCSPA, STCSPB, STCSPC,
ST2CSPA, ST2CSPB, ST2CSPC]:
args.insert(2, n) # number of repeats
n = 1
elif m is nn.BatchNorm2d:
args = [ch[f]]
elif m is Concat:
c2 = sum([ch[x] for x in f])
elif m is Chuncat:
c2 = sum([ch[x] for x in f])
elif m is Shortcut:
c2 = ch[f[0]]
elif m is Foldcut:
c2 = ch[f] // 2
elif m in [Detect, IDetect, IAuxDetect, IBin]:
args.append([ch[x] for x in f])
if isinstance(args[1], int): # number of anchors
args[1] = [list(range(args[1] * 2))] * len(f)
elif m is ReOrg:
c2 = ch[f] * 4
elif m is Contract:
c2 = ch[f] * args[0] ** 2
elif m is Expand:
c2 = ch[f] // args[0] ** 2
else:
c2 = ch[f]
m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args) # module
t = str(m)[8:-2].replace('__main__.', '') # module type
np = sum([x.numel() for x in m_.parameters()]) # number params
m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params
logger.info('%3s%18s%3s%10.0f %-40s%-30s' % (i, f, n, np, t, args)) # print
save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist
layers.append(m_)
if i == 0:
ch = []
ch.append(c2)
return nn.Sequential(*layers), sorted(save)
- 前向传播执行每一个构建好的模块
展开前向传播代码
def forward_once(self, x, profile=False):
y, dt = [], [] # outputs
for m in self.model:#按配置文件的每一层网络进行叠加
if m.f != -1: # if not from previous layer
x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers
if not hasattr(self, 'traced'):
self.traced=False
if self.traced:
if isinstance(m, Detect) or isinstance(m, IDetect) or isinstance(m, IAuxDetect):
break
if profile:
c = isinstance(m, (Detect, IDetect, IAuxDetect, IBin))
o = thop.profile(m, inputs=(x.copy() if c else x,), verbose=False)[0] / 1E9 * 2 if thop else 0 # FLOPS
for _ in range(10):
m(x.copy() if c else x)
t = time_synchronized()
for _ in range(10):
m(x.copy() if c else x)
dt.append((time_synchronized() - t) * 100)
print('%10.1f%10.0f%10.1fms %-40s' % (o, m.np, dt[-1], m.type))
x = m(x) # run
y.append(x if m.i in self.save else None) # save output
if profile:
print('%.1fms total' % sum(dt))
return x
(1)backbone
- 整个backbone层由若干BConv层、E-ELAN层以及MPConv层交替减半长宽,增倍通道,提取特征。
- 其中BConv层由卷积层+BN层+ReakyReLu激活函数组成。
- E-ELAN层
- MPConv层
(2)SPPCSPC层
class SPPCSPC(nn.Module):
def forward(self, x):
x1 = self.cv4(self.cv3(self.cv1(x)))#连续走了三个卷积
y1 = self.cv6(self.cv5(torch.cat([x1] + [m(x1) for m in self.m], 1)))#5,9,13三种不同的卷积核做MP,本来的x1,四个拼接在一起
y2 = self.cv2(x)
return self.cv7(torch.cat((y1, y2), dim=1))
(3)RepConv层
- RepConv层在训练和部署的时候结构不同,在训练的时候由3*3的卷积添加1*1的卷积分支,同时如果输入和输出的channel以及h,w的size一致时,再添加一个BN的分支,三个分支相加输出,在部署时,为了方便部署,会将分支的参数重参数化到主分支上,取3*3的主分支卷积输出。
class RepConv(nn.Module):
def forward(self, inputs):
if hasattr(self, "rbr_reparam"):
return self.act(self.rbr_reparam(inputs))
if self.rbr_identity is None:
id_out = 0
else:
id_out = self.rbr_identity(inputs)
return self.act(self.rbr_dense(inputs) + self.rbr_1x1(inputs) + id_out)#训练阶段
2.标签分配
(1)对位置和anchor框大小做初筛
- 为了匹配的更好,提升召回率,增加正样本数量,上下左右偏移0.5单位得到更多的正样本,并且 0.25<GT与anchor长宽比<4 才会被保留。
展开代码
def find_3_positive(self, p, targets):
# Build targets for compute_loss(), input targets(image,class,x,y,w,h)
na, nt = self.na, targets.shape[0] # number of anchors=3, targets
indices, anch = [], []
gain = torch.ones(7, device=targets.device).long() # 7表示源标签6个+框ID(属于哪个大小的anchor)normalized to gridspace gain
ai = torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt) #每个点对应的anchors,三种都有可能, same as .repeat_interleave(nt)
targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), 2) # 最后加了一个维度表示anchor的ID append anchor indices
g = 0.5 # bias 一会玩漂移,上下左右偏移0.5单位得到更多的正样本
off = torch.tensor([[0, 0],#本身
[1, 0], [0, 1], [-1, 0], [0, -1], # 右方、上、左、下 j,k,l,m
# [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm
], device=targets.device).float() * g # 数值乘以方向表示沿该方向偏移多少,offsets
for i in range(self.nl):#有3个输出层,分别做
anchors = self.anchors[i]#当前输出层对应的anchor
gain[2:6] = torch.tensor(p[i].shape)[[3, 2, 3, 2]] #赋值,一会用 xyxy gain
# Match targets to anchors这块在遍历看看这些GT到底放在哪个输出层合适
t = targets * gain#归一化的标签映射到特征图上
if nt:
# Matches
r = t[:, :, 4:6] / anchors[:, None] # 每一个GT与anchor大宽高比大小,wh ratio
j = torch.max(r, 1. / r).max(2)[0] < self.hyp['anchor_t'] # 0.25<比例<4 才会被保留
# compare
# j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2))
t = t[j] # filter
# Offsets
gxy = t[:, 2:4] # 到左上角的距离 grid xy
gxi = gain[[2, 3]] - gxy #到右下角的距离 inverse
j, k = ((gxy % 1. < g) & (gxy > 1.)).T#当前格子,离左上角近的选出来,而且不能是边界
l, m = ((gxi % 1. < g) & (gxi > 1.)).T#离右下角近的选出来,而且不能是边界
j = torch.stack((torch.ones_like(j), j, k, l, m))#5个,因为自己所在的实际位置一定为true
t = t.repeat((5, 1, 1))[j]#相当于原来就1个,现在还有考虑两个邻居,target必然增多
offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j]#对应区域玩对应漂移大小,都是0.5个单位
else:
t = targets[0]
offsets = 0
# Define
b, c = t[:, :2].long().T # image, class
gxy = t[:, 2:4] # grid xy
gwh = t[:, 4:6] # grid wh
gij = (gxy - offsets).long()#漂移后整数部分就是格子的索引
gi, gj = gij.T # grid xy indices
# Append
a = t[:, 6].long() # 每一个target对应的anchor indices
indices.append((b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1))) # image, anchor, grid indices
anch.append(anchors[a]) # anchors大小
return indices, anch
(2)根据IOU和类别进行复筛
- 计算GT与所有候选正样本的IOU,根据top_k选择k个框,太小的不需要,最少1个。同时计算类别损失,最后,根据IOU情况和分类情况,综合考虑。若一个正样本匹配到了多个GT的情况,那就匹配跟哪一个损失最小,删除其他。
展开前向传播代码
#复筛:IOU、类别(根据这两找损失最小的)
matching_bs = [[] for pp in p]
matching_as = [[] for pp in p]
matching_gjs = [[] for pp in p]
matching_gis = [[] for pp in p]
matching_targets = [[] for pp in p]
matching_anchs = [[] for pp in p]
nl = len(p)
for batch_idx in range(p[0].shape[0]):
b_idx = targets[:, 0]==batch_idx
this_target = targets[b_idx]#当前图像里的标注框GT
if this_target.shape[0] == 0:
continue
txywh = this_target[:, 2:6] * imgs[batch_idx].shape[1]#得到实际大小
txyxy = xywh2xyxy(txywh)
pxyxys = []
p_cls = []
p_obj = []
from_which_layer = []
all_b = []
all_a = []
all_gj = []
all_gi = []
all_anch = []
for i, pi in enumerate(p):#遍历每一个输出层(3层)
b, a, gj, gi = indices[i]
idx = (b == batch_idx)
b, a, gj, gi = b[idx], a[idx], gj[idx], gi[idx] #生成候选框的位置,索引值
all_b.append(b)
all_a.append(a)
all_gj.append(gj)
all_gi.append(gi)
all_anch.append(anch[i][idx])
from_which_layer.append(torch.ones(size=(len(b),)) * i)#来自哪个输出层
fg_pred = pi[b, a, gj, gi] #取对应target位置的预测结果
p_obj.append(fg_pred[:, 4:5])
p_cls.append(fg_pred[:, 5:])
grid = torch.stack([gi, gj], dim=1)
pxy = (fg_pred[:, :2].sigmoid() * 2. - 0.5 + grid) * self.stride[i] #中心点在当前格子偏移量,-0.5-1.5之间,再还原 / 8.
#pxy = (fg_pred[:, :2].sigmoid() * 3. - 1. + grid) * self.stride[i]
pwh = (fg_pred[:, 2:4].sigmoid() * 2) ** 2 * anch[i][idx] * self.stride[i] #之前是考虑四倍,这也得同步 / 8.
pxywh = torch.cat([pxy, pwh], dim=-1)
pxyxy = xywh2xyxy(pxywh)
pxyxys.append(pxyxy)
pxyxys = torch.cat(pxyxys, dim=0)
if pxyxys.shape[0] == 0:
continue
p_obj = torch.cat(p_obj, dim=0)
p_cls = torch.cat(p_cls, dim=0)
from_which_layer = torch.cat(from_which_layer, dim=0)
all_b = torch.cat(all_b, dim=0)
all_a = torch.cat(all_a, dim=0)
all_gj = torch.cat(all_gj, dim=0)
all_gi = torch.cat(all_gi, dim=0)
all_anch = torch.cat(all_anch, dim=0)
pair_wise_iou = box_iou(txyxy, pxyxys)#计算GT与所有候选正样本的IOU
pair_wise_iou_loss = -torch.log(pair_wise_iou + 1e-8)#IOU损失
top_k, _ = torch.topk(pair_wise_iou, min(10, pair_wise_iou.shape[1]), dim=1)#多的话选10个,少的话有几个算几个
dynamic_ks = torch.clamp(top_k.sum(1).int(), min=1)#累加,取整,取这么多个框,相当于有些可能太小的我不需要,最少1个
#类别损失
gt_cls_per_image = (#真实值
F.one_hot(this_target[:, 1].to(torch.int64), self.nc)
.float()
.unsqueeze(1)
.repeat(1, pxyxys.shape[0], 1)#onehot后重复候选框数量次
)
num_gt = this_target.shape[0]
cls_preds_ = (#预测类别情况
p_cls.float().unsqueeze(0).repeat(num_gt, 1, 1).sigmoid_()
* p_obj.unsqueeze(0).repeat(num_gt, 1, 1).sigmoid_()#是物体的置信度
)
y = cls_preds_.sqrt_()
pair_wise_cls_loss = F.binary_cross_entropy_with_logits(
torch.log(y/(1-y)) , gt_cls_per_image, reduction="none"
).sum(-1)#交叉熵,类别差异
del cls_preds_
cost = (#候选框里开始选了,要看他们的IOU情况和分类情况,综合考虑
pair_wise_cls_loss
+ 3.0 * pair_wise_iou_loss#IOU损失占3份
)
matching_matrix = torch.zeros_like(cost)#匹配矩阵,匹配上的为1
for gt_idx in range(num_gt):#遍历损失选最小的几个,根据所有在匹配矩阵中置0
_, pos_idx = torch.topk(
cost[gt_idx], k=dynamic_ks[gt_idx].item(), largest=False
)
matching_matrix[gt_idx][pos_idx] = 1.0
del top_k, dynamic_ks
anchor_matching_gt = matching_matrix.sum(0)#竖着加
if (anchor_matching_gt > 1).sum() > 0:#一个正样本匹配到了多个GT的情况
_, cost_argmin = torch.min(cost[:, anchor_matching_gt > 1], dim=0)#那就匹配跟哪一个损失最小,删除其他
matching_matrix[:, anchor_matching_gt > 1] *= 0.0#其他删除
matching_matrix[cost_argmin, anchor_matching_gt > 1] = 1.0#最小的那个保留
fg_mask_inboxes = matching_matrix.sum(0) > 0.0#哪些是正样本
matched_gt_inds = matching_matrix[:, fg_mask_inboxes].argmax(0)#每个正样本对应的真实框索引
from_which_layer = from_which_layer[fg_mask_inboxes]
all_b = all_b[fg_mask_inboxes]#对应的batch索引
all_a = all_a[fg_mask_inboxes]#对应的anchor索引
all_gj = all_gj[fg_mask_inboxes]
all_gi = all_gi[fg_mask_inboxes]
all_anch = all_anch[fg_mask_inboxes]
this_target = this_target[matched_gt_inds]#匹配到正样本的GT
for i in range(nl):#得到每一层的正样本
layer_idx = from_which_layer == i
matching_bs[i].append(all_b[layer_idx])
matching_as[i].append(all_a[layer_idx])
matching_gjs[i].append(all_gj[layer_idx])
matching_gis[i].append(all_gi[layer_idx])
matching_targets[i].append(this_target[layer_idx])
matching_anchs[i].append(all_anch[layer_idx])
3.计算损失
- 损失=回归损失+置信度损失+分类损失。使用CIOU损失函数,考虑了:重叠面积、中心点距离、宽高比。
展开代码
def __call__(self, p, targets, imgs): # predictions预测值, targets标签, model 实际的原始输入
device = targets.device
lcls, lbox, lobj = torch.zeros(1, device=device), torch.zeros(1, device=device), torch.zeros(1, device=device)#初始化需要计算的三个损失
bs, as_, gjs, gis, targets, anchors = self.build_targets(p, targets, imgs)
pre_gen_gains = [torch.tensor(pp.shape, device=device)[[3, 2, 3, 2]] for pp in p]
# Losses
for i, pi in enumerate(p): # layer index, layer predictions
b, a, gj, gi = bs[i], as_[i], gjs[i], gis[i] # image, anchor, gridy, gridx
tobj = torch.zeros_like(pi[..., 0], device=device) # target obj
n = b.shape[0] # number of targets
if n:
ps = pi[b, a, gj, gi] # prediction subset corresponding to targets
# Regression,XYWH损失,预测的准不准
grid = torch.stack([gi, gj], dim=1)#特征图中索引位置
pxy = ps[:, :2].sigmoid() * 2. - 0.5
#pxy = ps[:, :2].sigmoid() * 3. - 1.
pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i]
pbox = torch.cat((pxy, pwh), 1) # predicted box
selected_tbox = targets[i][:, 2:6] * pre_gen_gains[i]
selected_tbox[:, :2] -= grid#拿到偏移量
iou = bbox_iou(pbox.T, selected_tbox, x1y1x2y2=False, CIoU=True) # CIOU重叠面积、中心点距离、宽高比,同时加入了计算iou(prediction, target)
lbox += (1.0 - iou).mean() # iou loss
# Objectness置信度损失
tobj[b, a, gj, gi] = (1.0 - self.gr) + self.gr * iou.detach().clamp(0).type(tobj.dtype) # iou ratio当做置信度
# Classification
selected_tcls = targets[i][:, 1].long()
if self.nc > 1: # cls loss (only if multiple classes)
t = torch.full_like(ps[:, 5:], self.cn, device=device) # targets
t[range(n), selected_tcls] = self.cp#onehot一下
lcls += self.BCEcls(ps[:, 5:], t) # BCE
# Append targets to text file
# with open('targets.txt', 'a') as file:
# [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)]
obji = self.BCEobj(pi[..., 4], tobj)#大部分都是背景
lobj += obji * self.balance[i] # 置信度损失obj loss
if self.autobalance:
self.balance[i] = self.balance[i] * 0.9999 + 0.0001 / obji.detach().item()
if self.autobalance:
self.balance = [x / self.balance[self.ssi] for x in self.balance]
lbox *= self.hyp['box']#回归损失
lobj *= self.hyp['obj']#置信度损失
lcls *= self.hyp['cls']#分类损失
bs = tobj.shape[0] # batch size
loss = lbox + lobj + lcls
return loss * bs, torch.cat((lbox, lobj, lcls, loss)).detach()
4.预测结果
展开代码
def detect(save_img=False):
source, weights, view_img, save_txt, imgsz, trace = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size, not opt.no_trace
save_img = not opt.nosave and not source.endswith('.txt') # save inference images
webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith(
('rtsp://', 'rtmp://', 'http://', 'https://'))
# Directories
save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run
(save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir
# Initialize
set_logging()
device = select_device(opt.device)
half = device.type != 'cpu' # half precision only supported on CUDA
# Load model
model = attempt_load(weights, map_location=device) # load FP32 model
stride = int(model.stride.max()) # model stride
imgsz = check_img_size(imgsz, s=stride) # check img_size
if trace:
model = TracedModel(model, device, opt.img_size)
if half:
model.half() # to FP16
# Second-stage classifier
classify = False
if classify:
modelc = load_classifier(name='resnet101', n=2) # initialize
modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval()
# Set Dataloader
vid_path, vid_writer = None, None
if webcam:
view_img = check_imshow()
cudnn.benchmark = True # set True to speed up constant image size inference
dataset = LoadStreams(source, img_size=imgsz, stride=stride)
else:
dataset = LoadImages(source, img_size=imgsz, stride=stride)
# Get names and colors
names = model.module.names if hasattr(model, 'module') else model.names
colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]
# Run inference
if device.type != 'cpu':
model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once
t0 = time.time()
for path, img, im0s, vid_cap in dataset:
img = torch.from_numpy(img).to(device)
img = img.half() if half else img.float() # uint8 to fp16/32
img /= 255.0 # 0 - 255 to 0.0 - 1.0
if img.ndimension() == 3:
img = img.unsqueeze(0)
# Inference
t1 = time_synchronized()
pred = model(img, augment=opt.augment)[0]
# Apply NMS
pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
t2 = time_synchronized()
# Apply Classifier
if classify:
pred = apply_classifier(pred, modelc, img, im0s)
# Process detections
for i, det in enumerate(pred): # detections per image
if webcam: # batch_size >= 1
p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(), dataset.count
else:
p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0)
p = Path(p) # to Path
save_path = str(save_dir / p.name) # img.jpg
txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # img.txt
s += '%gx%g ' % img.shape[2:] # print string
gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh
if len(det):
# Rescale boxes from img_size to im0 size
det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
# Print results
for c in det[:, -1].unique():
n = (det[:, -1] == c).sum() # detections per class
s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string
# Write results
for *xyxy, conf, cls in reversed(det):
if save_txt: # Write to file
xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh
line = (cls, *xywh, conf) if opt.save_conf else (cls, *xywh) # label format
with open(txt_path + '.txt', 'a') as f:
f.write(('%g ' * len(line)).rstrip() % line + '\n')
if save_img or view_img: # Add bbox to image
label = f'{names[int(cls)]} {conf:.2f}'
plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3)
# Print time (inference + NMS)
#print(f'{s}Done. ({t2 - t1:.3f}s)')
# Stream results
if view_img:
cv2.imshow(str(p), im0)
cv2.waitKey(1) # 1 millisecond
# Save results (image with detections)
if save_img:
if dataset.mode == 'image':
cv2.imwrite(save_path, im0)
print(f" The image with the result is saved in: {save_path}")
else: # 'video' or 'stream'
if vid_path != save_path: # new video
vid_path = save_path
if isinstance(vid_writer, cv2.VideoWriter):
vid_writer.release() # release previous video writer
if vid_cap: # video
fps = vid_cap.get(cv2.CAP_PROP_FPS)
w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
else: # stream
fps, w, h = 30, im0.shape[1], im0.shape[0]
save_path += '.mp4'
vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
vid_writer.write(im0)
if save_txt or save_img:
s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
#print(f"Results saved to {save_dir}{s}")
print(f'Done. ({time.time() - t0:.3f}s)')
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--weights', nargs='+', type=str, default='yolov7.pt', help='model.pt path(s)')
parser.add_argument('--source', type=str, default='inference/images', help='source') # file/folder, 0 for webcam
parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
parser.add_argument('--conf-thres', type=float, default=0.25, help='object confidence threshold')
parser.add_argument('--iou-thres', type=float, default=0.45, help='IOU threshold for NMS')
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
parser.add_argument('--view-img', action='store_true', help='display results')
parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3')
parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
parser.add_argument('--augment', action='store_true', help='augmented inference')
parser.add_argument('--update', action='store_true', help='update all models')
parser.add_argument('--project', default='runs/detect', help='save results to project/name')
parser.add_argument('--name', default='exp', help='save results to project/name')
parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
parser.add_argument('--no-trace', action='store_true', help='don`t trace model')
opt = parser.parse_args()
print(opt)
#check_requirements(exclude=('pycocotools', 'thop'))
with torch.no_grad():
if opt.update: # update all models (to fix SourceChangeWarning)
for opt.weights in ['yolov7.pt']:
detect()
strip_optimizer(opt.weights)
else:
detect()
- 训练完后结果保存至runs文件夹,包括训练时候参数,各epoch情况,损失,测试集的结果。
- 调用训练好的结果做预测,预测如下: