import cv2 import numpy as np import torch.nn import torchvision import torchvision.transforms.functional as T checkpoint = torch.load("D:/yolov5m.pt", map_location="cpu") model = checkpoint["model"].float() model.eval() model.fuse() model.model[-1].export = True # 在Python中推理,把BGR换位RGB image = cv2.imread("inference/images/zidane.jpg") show = image.copy() image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # 因为训练图片的时候是640*640来训练数据,预测也需要对长边缩放到640来训练 train_image_size = 640 image_height, image_width = image.shape[:2] scale = train_image_size / max(image_height, image_width) x_offset = train_image_size * 0.5 - image_width * scale * 0.5 y_offset = train_image_size * 0.5 - image_height * scale * 0.5 M = np.array([ [scale, 0, x_offset], [0, scale, y_offset] ], dtype=np.float32) inverse_M = cv2.invertAffineTransform(M) image = cv2.warpAffine(image, M, (train_image_size, train_image_size), borderMode=cv2.BORDER_CONSTANT, borderValue=(114, 114, 114)) # 需要转化维度并且归一化处理 # 这个方法,如果输入的是整数这会除以255,并且转化维度为C,H,W # 这个方法,如果是浮点数转化维度为C,H,W image = T.to_tensor(image).unsqueeze(dim=0) # 因为这里我们转到tensorRT的时候把Dectcet,我们需要自己推理模块中的Foucs改变了所以我们我也需要对图片精选处理 image = torch.cat([ image[..., ::2, ::2], image[..., 1::2, ::2], image[..., ::2, 1::2], image[..., 1::2, 1::2] ], dim=1) # 前面的都是图像预处理,到推理阶段 with torch.no_grad(): predicts = model(image) # 解码恢复为框 image_objects = [] for level_index in range(3): predict = predicts[level_index] stride = model.model[-1].stride[level_index] anchor = model.model[-1].anchors[level_index] # predict.shape 1*255*80*80 255 = 3*(5+80) 这里是80个类别 num_classes = int(predict.shape[1] / 3 - 5) predict.sigmoid_() threshold = 0.25 num_anchor = 3 # 遍历strid for anchor_index in range(num_anchor): channel_begin = anchor_index * (5 + num_classes) # 回归框的值 regression = predict[0, channel_begin + 0:channel_begin + 4] # 目标真实值 objectness = predict[0, channel_begin + 4] # 类别数 classifier = predict[0, channel_begin + 5:channel_begin + 5 + num_classes] # torch.where 返回值为tuple元组,保存的内容为objectness中分别为列和行对应的值 # a = np.array([ # [1, 1, 0], # [1, 1, 0] # ]) # a = torch.tensor(a) # y1, x1 = torch.where(a >= 0.5) # y1 = tensor([0, 0, 1, 1]) x1 = tensor([0, 1, 0, 1]) grid_y, grid_x = torch.where(objectness >= threshold) if len(grid_y) == 0: continue select_classifier = classifier[:, grid_y, grid_x] max_class_id = select_classifier.argmax(dim=0) max_class_score = select_classifier[max_class_id, torch.arange(len(max_class_id))] select_objectness_score = objectness[grid_y, grid_x] # 这里是官方yolov5就是这样做的 select_object_confidence = select_objectness_score * max_class_score # 遍历第二次大于阈值的类 keep_object_index = torch.where(select_object_confidence >= threshold) if len(keep_object_index) == 0: continue object_confidence = select_object_confidence[keep_object_index] object_class = max_class_id[keep_object_index] grid_x = grid_x[keep_object_index] grid_y = grid_y[keep_object_index] # yolov5训练的框回归值其实是从0开始的,所以我们只是预测了我们的偏移量 box_cx, box_cy = ((regression[:2, grid_y, grid_x].view(2, -1) * 2) - 0.5 + torch.stack([grid_x, grid_y], dim=0)) * stride box_width, box_height = torch.pow(regression[2:4, grid_y, grid_x] * 2, 2) * anchor[anchor_index].view(2, 1) * stride box_left = box_cx - (box_width - 1) * 0.5 box_right = box_cx + (box_width - 1) * 0.5 box_top = box_cy - (box_height - 1) * 0.5 box_bottom = box_cy + (box_height - 1) * 0.5 box_source_left = box_left * inverse_M[0, 0] + box_top * inverse_M[0, 1] + inverse_M[0, 2] box_source_top = box_left * inverse_M[1, 0] + box_top * inverse_M[1, 1] + inverse_M[1, 2] box_source_right = box_right * inverse_M[0, 0] + box_bottom * inverse_M[0, 1] + inverse_M[0, 2] box_source_bottom = box_right * inverse_M[1, 0] + box_bottom * inverse_M[1, 1] + inverse_M[1, 2] objs = torch.stack([ box_source_left, box_source_top, box_source_right, box_source_bottom, object_confidence, object_class ], dim=1) image_objects.append(objs) # image_objs = torch.cat(image_objects, dim=0) # # for left, top, right, bottom, confidence, class_id in image_objs: # cv2.rectangle(show, # (int(left), int(top)), # (int(right), int(bottom)), # (0, 255, 0), # 2) # cv2.imwrite("show.jpg", show) # 会获得很多框所以需要nms # 这里我们需要做类的nms,不做类间的 image_objs = torch.cat(image_objects, dim=0) max_image_size = 4096 # left,right,top,bottom,confidence,class,扩大类与类之间的影响 nms_input_box = image_objs[:, :4] + image_objs[:, 5][:, None] * max_image_size # boxes: Tensor, scores: Tensor, iou_threshold: float keep_index = torchvision.ops.nms(nms_input_box, image_objs[:, 4], 0.5) image_objs = image_objs[keep_index] for left, top, right, bottom, confidence, class_id in image_objs: cv2.rectangle(show, (int(left), int(top)), (int(right), int(bottom)), (0, 255, 0), 2) cv2.imwrite("show.jpg", show)View Code
认真仔细,计算出错认真排查
标签:box,index,yolov5,inverse,image,torch,grid,复现,模块 From: https://www.cnblogs.com/xiaoruirui/p/16929051.html