yolov5推理模块复现

标签：box index yolov5 inverse image torch grid 复现模块
import cv2
import numpy as np
import torch.nn
import torchvision
import torchvision.transforms.functional as T

checkpoint = torch.load("D:/yolov5m.pt", map_location="cpu")
model = checkpoint["model"].float()
model.eval()

model.fuse()
model.model[-1].export = True

# 在Python中推理，把BGR换位RGB

image = cv2.imread("inference/images/zidane.jpg")
show = image.copy()
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

# 因为训练图片的时候是640*640来训练数据，预测也需要对长边缩放到640来训练
train_image_size = 640
image_height, image_width = image.shape[:2]
scale = train_image_size / max(image_height, image_width)
x_offset = train_image_size * 0.5 - image_width * scale * 0.5
y_offset = train_image_size * 0.5 - image_height * scale * 0.5

M = np.array([
    [scale, 0, x_offset],
    [0, scale, y_offset]
], dtype=np.float32)
inverse_M = cv2.invertAffineTransform(M)
image = cv2.warpAffine(image, M, (train_image_size, train_image_size), borderMode=cv2.BORDER_CONSTANT,
                       borderValue=(114, 114, 114))

# 需要转化维度并且归一化处理
# 这个方法，如果输入的是整数这会除以255，并且转化维度为C,H,W
# 这个方法，如果是浮点数转化维度为C,H,W
image = T.to_tensor(image).unsqueeze(dim=0)

# 因为这里我们转到tensorRT的时候把Dectcet，我们需要自己推理模块中的Foucs改变了所以我们我也需要对图片精选处理

image = torch.cat([
    image[..., ::2, ::2],
    image[..., 1::2, ::2],
    image[..., ::2, 1::2],
    image[..., 1::2, 1::2]
], dim=1)

# 前面的都是图像预处理，到推理阶段
with torch.no_grad():
    predicts = model(image)

# 解码恢复为框
image_objects = []
for level_index in range(3):
    predict = predicts[level_index]
    stride = model.model[-1].stride[level_index]
    anchor = model.model[-1].anchors[level_index]

    # predict.shape 1*255*80*80  255 = 3*(5+80) 这里是80个类别
    num_classes = int(predict.shape[1] / 3 - 5)
    predict.sigmoid_()

    threshold = 0.25
    num_anchor = 3

    # 遍历strid
    for anchor_index in range(num_anchor):
        channel_begin = anchor_index * (5 + num_classes)
        # 回归框的值
        regression = predict[0, channel_begin + 0:channel_begin + 4]
        # 目标真实值
        objectness = predict[0, channel_begin + 4]
        # 类别数
        classifier = predict[0, channel_begin + 5:channel_begin + 5 + num_classes]
        # torch.where 返回值为tuple元组，保存的内容为objectness中分别为列和行对应的值
        # a = np.array([
        #     [1, 1, 0],
        #     [1, 1, 0]
        # ])
        # a = torch.tensor(a)
        # y1, x1 = torch.where(a >= 0.5)
        # y1 = tensor([0, 0, 1, 1])  x1 = tensor([0, 1, 0, 1])
        grid_y, grid_x = torch.where(objectness >= threshold)
        if len(grid_y) == 0:
            continue
        select_classifier = classifier[:, grid_y, grid_x]
        max_class_id = select_classifier.argmax(dim=0)
        max_class_score = select_classifier[max_class_id, torch.arange(len(max_class_id))]
        select_objectness_score = objectness[grid_y, grid_x]
        # 这里是官方yolov5就是这样做的
        select_object_confidence = select_objectness_score * max_class_score

        # 遍历第二次大于阈值的类
        keep_object_index = torch.where(select_object_confidence >= threshold)
        if len(keep_object_index) == 0:
            continue
        object_confidence = select_object_confidence[keep_object_index]
        object_class = max_class_id[keep_object_index]
        grid_x = grid_x[keep_object_index]
        grid_y = grid_y[keep_object_index]

        # yolov5训练的框回归值其实是从0开始的，所以我们只是预测了我们的偏移量
        box_cx, box_cy = ((regression[:2, grid_y, grid_x].view(2, -1) * 2) - 0.5 + torch.stack([grid_x, grid_y],
                                                                                               dim=0)) * stride
        box_width, box_height = torch.pow(regression[2:4, grid_y, grid_x] * 2, 2) * anchor[anchor_index].view(2,
                                                                                                              1) * stride

        box_left = box_cx - (box_width - 1) * 0.5
        box_right = box_cx + (box_width - 1) * 0.5
        box_top = box_cy - (box_height - 1) * 0.5
        box_bottom = box_cy + (box_height - 1) * 0.5

        box_source_left = box_left * inverse_M[0, 0] + box_top * inverse_M[0, 1] + inverse_M[0, 2]
        box_source_top = box_left * inverse_M[1, 0] + box_top * inverse_M[1, 1] + inverse_M[1, 2]
        box_source_right = box_right * inverse_M[0, 0] + box_bottom * inverse_M[0, 1] + inverse_M[0, 2]
        box_source_bottom = box_right * inverse_M[1, 0] + box_bottom * inverse_M[1, 1] + inverse_M[1, 2]

        objs = torch.stack([
            box_source_left,
            box_source_top,
            box_source_right,
            box_source_bottom,
            object_confidence,
            object_class
        ], dim=1)
        image_objects.append(objs)

# image_objs = torch.cat(image_objects, dim=0)
#
# for left, top, right, bottom, confidence, class_id in image_objs:
#     cv2.rectangle(show,
#                   (int(left), int(top)),
#                   (int(right), int(bottom)),
#                   (0, 255, 0),
#                   2)
# cv2.imwrite("show.jpg", show)
# 会获得很多框所以需要nms
# 这里我们需要做类的nms，不做类间的
image_objs = torch.cat(image_objects, dim=0)
max_image_size = 4096
# left,right,top,bottom,confidence,class,扩大类与类之间的影响
nms_input_box = image_objs[:, :4] + image_objs[:, 5][:, None] * max_image_size
# boxes: Tensor, scores: Tensor, iou_threshold: float
keep_index = torchvision.ops.nms(nms_input_box, image_objs[:, 4], 0.5)
image_objs = image_objs[keep_index]

for left, top, right, bottom, confidence, class_id in image_objs:
    cv2.rectangle(show,
                  (int(left), int(top)),
                  (int(right), int(bottom)),
                  (0, 255, 0),
                  2)
cv2.imwrite("show.jpg", show)
View Code
认真仔细，计算出错认真排查
标签：box,index,yolov5,inverse,image,torch,grid,复现,模块
From： https://www.cnblogs.com/xiaoruirui/p/16929051.html
相关文章

赞助商

阅读排行