yolov5dataset复写

标签：pixel yolov5dataset image self width height 复写 annotations

import os
import random
import sys
import cv2
import numpy as np
from PIL import Image
import sys_utils
import nn_utils
from sys_utils import _single_instance_logger as logger
import copy

import torch
import torch.nn as nn


class VOCDataSet:
    def __init__(self, augment, image_size, root):
        self.augment = augment
        self.image_size = image_size
        self.root = root
        self.label_map = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow",
                          "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train",
                          "tvmonitor"]
        self.all_labled_annotations = []
        self.border_fill_value = 114
        cache_name = sys_utils.get_md5(root)
        self.cache_and_build(f"runs/dataset_cache/{cache_name}temp.cache")

    def cache_and_build(self, cache_file):
        if os.path.exists(cache_file):
            logger.info(f"Load form cache {cache_file}")
            self.load_labeled_information_from_cache(cache_file)
        else:
            logger.info(f"Build and cache {cache_file}")
            self.build_labeled_information_cache(cache_file)

    def load_labeled_information_from_cache(self, cache_file):
        self.all_labled_annotations = torch.load(cache_file)

    def build_labeled_information_cache(self, cache_file):
        """
        1.实现数据检查校验和缓存
        """
        annotations_files = os.listdir(os.path.join(self.root, "Annotations"))
        # 保留后缀为xml的
        annotations_files = list(filter(lambda x: x.endswith(".xml"), annotations_files))

        # 修改xml为jpg
        jpeg_files = [item[:-3] + "jpg" for item in annotations_files]
        # 添加全路径
        annotations_files = list(map(lambda x: os.path.join(self.root, "Annotations", x), annotations_files))
        jpeg_files = list(map(lambda x: os.path.join(self.root, "JPEGImages", x), jpeg_files))

        for annotation_file, jpeg_file in zip(annotations_files, jpeg_files):
            # 数据检查1.图片是否损坏，如果损坏抛出异常2.图片是否过小，如果小抛出异常
            # 加载标注信息，并保存起来，标注信息normalize过的
            pil_image = Image.open(jpeg_file)
            pil_image.verify()
            image_width, image_height = sys_utils.exif_size(pil_image)
            assert image_width > 9 and image_height > 9, f"Image size is too small{image_width} x {image_height}"

            # 加载标注信息[cx,cy,width,height,class_index]
            pixel_annotation = self.load_voc_annotation(annotation_file, self.label_map)

            # 转换标注信息到normalize格式
            normalize_annotations = self.convert_to_normalize_annotation(pixel_annotation, image_width,
                                                                         image_height)
            self.all_labled_annotations.append([jpeg_file, normalize_annotations, [image_width, image_height]])
        sys_utils.mkparents(cache_file)
        torch.save(self.all_labled_annotations, cache_file)

    def convert_to_normalize_annotation(self, pixel_annotations, image_width, image_height):
        """
        转换标注信息到normalize格式，除以图像宽高进行归一化
        参数：
        pixel_annotations[Nx5]:       指定为标注信息，格式是[left, top, right, bottom, class_index]
        image_width[int]:             指定为标注信息的图像宽度
        image_height[int]:            指定为标注信息的图像高度
        返回值：
        normalize_annotations[Nx5]:   返回格式是[cx, cy, width, height, class_index]
        """
        normalize_annotations = pixel_annotations.copy()
        left, top, right, bottom = [pixel_annotations[:, i] for i in range(4)]
        normalize_annotations[:, 0] = (left + right) * 0.5
        normalize_annotations[:, 1] = (top + bottom) * 0.5
        normalize_annotations[:, 2] = right - left + 1
        normalize_annotations[:, 3] = bottom - top + 1

        normalize_annotations /= [image_width, image_height, image_width, image_height, 1]
        return normalize_annotations

    def len(self):
        return 0

    def __getitem__(self, image_index):
        return None

    def load_mosaic(self, image_index):
        """
        把指定图像和随机3个图像拼为一个2*image_size大小的大图
        :param image_index:指定图片索引
        :return:image大小为2*image_size，和指定框的normalize_annotation
        """
        # 选取0.5image_size-1.5image_size的随机中心点
        x_center = int(random.uniform(self.image_size * 0.5, self.image_size * 1.5))
        y_center = int(random.uniform(self.image_size * 0.5, self.image_size * 1.5))
        # 随机选取三张图片索引
        image_len = len(self.all_labled_annotations)
        all_image_index = [image_index] + [random.randint(0, image_len - 1) for _ in range(3)]

        # 并且需要把对应四张图片分别移动到随机中心
        #  img1,  img2
        #  img3,  img4
        alignment_corner_point = [
            [1, 1],  # img1的角点相对于其宽高尺寸的位置
            [0, 1],  # img2的角点相对于其宽高尺寸的位置
            [1, 0],  # img3的角点相对于其宽高尺寸的位置
            [0, 0]  # img4的角点相对于其宽高尺寸的位置
        ]

        merge_mosaic_image_size = self.image_size * 2
        # np.full，如果填充的是tuple，会造成性能严重影响。因为图片为int所以需要给int
        merge_mosaic_image = np.full((merge_mosaic_image_size, merge_mosaic_image_size, 3), self.border_fill_value,
                                     dtype=np.uint8)
        merge_mosaic_pixel_annotations = []
        for index, (image_index, (corner_point_x, corner_point_y)) in enumerate(
                zip(all_image_index, alignment_corner_point)):
            image, normalize_annotations, (image_width, image_height) = self.load_image_with_uniform_scale(image_index)
            # 拼接前绘制，用来排查bug
            nn_utils.draw_norm_bboxes(image, normalize_annotations, color=(0, 0, 255), thickness=3)
            # if index == 0:
            # normalize_annotations = np.zeros((0, 5))
            corner_point_x = image_width * corner_point_x
            corner_point_y = image_height * corner_point_y
            x_offset = x_center - corner_point_x
            y_offset = y_center - corner_point_y

            # m矩阵需要指定dtyp=folat32
            M = np.array([
                [1, 0, x_offset],
                [0, 1, y_offset],
            ], dtype=np.float32)
            # 把图片image，映射到大小通过M为merge_mosaic_image_size的merge_mosaic_image，使用边框变换，及线性插值
            cv2.warpAffine(image, M, (merge_mosaic_image_size, merge_mosaic_image_size),
                           dst=merge_mosaic_image,
                           borderMode=cv2.BORDER_TRANSPARENT,
                           flags=cv2.INTER_NEAREST)
            # 转换框为pixel并且为【left，right，top，bottom】
            pixel_annotations = self.convert_to_pixel_annotation(normalize_annotations, image_width, image_height)

            # 因为图片平移所以框anchor也需要平移
            pixel_annotations = pixel_annotations + [x_offset, y_offset, x_offset, y_offset, 0]
            merge_mosaic_pixel_annotations.append(pixel_annotations)
        # 把所有的框拼为矩形
        merge_mosaic_pixel_annotations = np.concatenate(merge_mosaic_pixel_annotations, axis=0)
        np.clip(merge_mosaic_pixel_annotations[:, :4], a_min=0, a_max=merge_mosaic_image_size - 1,
                out=merge_mosaic_pixel_annotations[:, :4])
        # 随机仿射变化
        scale = random.uniform(0.5, 1.5)
        # 把大图1280 -》640为中心的小图上
        # x1*M001 + 0*M002 + x_offset
        # 640*M001 + x_offset = 320
        # x_offset = 320-640*scale
        #   x_offset = (0.5-scale)*self.image_size
        M = np.array([
            [scale, 0, (0.5 - scale) * self.image_size],
            [0, scale, (0.5 - scale) * self.image_size]
        ], dtype=np.float32)
        merge_mosaic_image = cv2.warpAffine(merge_mosaic_image, M, (self.image_size, self.image_size),
                                            borderMode=cv2.BORDER_CONSTANT,
                                            borderValue=self.border_fill_value, flags=cv2.INTER_LINEAR)

        # 使用M矩阵对框进行变化
        num_targets = len(merge_mosaic_pixel_annotations)
        output_normalize_annotations = np.zeros((0, 5))
        if num_targets > 0:
            # 把pixel中的annotation映射到对应的M矩阵
            # 映射标注框到目标图，使用M矩阵
            targets_temp = np.ones((num_targets * 2, 3))

            # 内存排布知识
            # N x 5
            # N x 4 -> left, top, right, bottom, left, top, right, bottom, left, top, right, bottom,
            #       -> reshape(N x 2, 2)
            #       -> left, top
            #       -> right, bottom
            #       -> left, top
            #       -> right, bottom
            # 把box标注信息变成一行一个点
            targets_temp[:, :2] = merge_mosaic_pixel_annotations[:, :4].reshape(num_targets * 2, 2)

            # targets_temp ->  2N x 3
            # M -> 2 x 3
            # output: 2N x 2,
            merge_projection_pixel_annotations = merge_mosaic_pixel_annotations.copy()
            merge_projection_pixel_annotations[:, :4] = (targets_temp @ M.T).reshape(num_targets, 4)

            # 处理框
            # 1. 裁切到图像范围
            # 2. 过滤掉无效的框
            np.clip(merge_projection_pixel_annotations[:, :4], a_min=0, a_max=self.image_size - 1,
                    out=merge_projection_pixel_annotations[:, :4])

            # 过滤无效的框
            projection_box_width = merge_projection_pixel_annotations[:, 2] - merge_projection_pixel_annotations[:,
                                                                              0] + 1
            projection_box_height = merge_projection_pixel_annotations[:, 3] - merge_projection_pixel_annotations[:,
                                                                               1] + 1
            original_box_width = merge_mosaic_pixel_annotations[:, 2] - merge_mosaic_pixel_annotations[:, 0] + 1
            original_box_height = merge_mosaic_pixel_annotations[:, 3] - merge_mosaic_pixel_annotations[:, 1] + 1

            area_projection = projection_box_width * projection_box_height
            area_original = original_box_width * original_box_height

            aspect_ratio = np.maximum(projection_box_width / (projection_box_height + 1e-6),
                                      projection_box_height / (projection_box_width + 1e-6))

            # 保留的条件分析
            # 1. 映射后的框，宽度必须大于2
            # 2. 映射后的框，高度必须大于2
            # 3. 裁切后的面积 / 裁切前的面积 > 0.2
            # 4. max(宽高比，高宽比) < 20
            keep_indices = (projection_box_width > 2) & \
                           (projection_box_height > 2) & \
                           (area_projection / (area_original * scale + 1e-6) > 0.2) & \
                           (aspect_ratio < 20)
            merge_projection_pixel_annotations = merge_projection_pixel_annotations[keep_indices]
            output_normalize_annotations = nn_utils.convert_to_normalize_annotation(merge_projection_pixel_annotations,
                                                                                    self.image_size, self.image_size)
        return merge_mosaic_image, output_normalize_annotations

    def convert_to_pixel_annotation(self, normalize_annotations, image_width, image_height):
        '''
        转换标注信息从normalize到pixel
        参数：
            normalize_annotations[Nx5]:   指定为标注信息，格式是[cx, cy, width, height, class_index]
            image_width[int]:             指定为标注信息的图像宽度
            image_height[int]:            指定为标注信息的图像高度
        返回值：
            pixel_annotations[Nx5]:   返回格式是[left, top, right, bottom, class_index]
        '''
        pixel_annotations = normalize_annotations.copy()
        cx, cy, width, height = [normalize_annotations[:, i] for i in range(4)]
        pixel_annotations[:, 0] = cx * image_width - (width * image_width - 1) * 0.5  # left
        pixel_annotations[:, 1] = cy * image_height - (height * image_height - 1) * 0.5  # top
        pixel_annotations[:, 2] = cx * image_width + (width * image_width - 1) * 0.5  # right
        pixel_annotations[:, 3] = cy * image_height + (height * image_height - 1) * 0.5  # bottom
        # pixel_annotations[:, 0] = cx * image_width - (width * image_width - 1) * 0.5
        # pixel_annotations[:, 1] = cy * image_height - (height * image_height - 1) * 0.5
        # pixel_annotations[:, 2] = cx * image_width + (width * image_width - 1) * 0.5
        # pixel_annotations[:, 3] = cy * image_height + (height * image_height - 1) * 0.5
        return pixel_annotations

    def load_image_with_uniform_scale(self, image_index):
        """
        加载图像，并且长边等比缩放到self.size大小
        :param image_index:图片索引
        :return:image,normalize_annotations,(image_resize-width,image_resize-height)
        """
        jpeg_file, normalize_annotations, (image_width, image_height) = self.all_labled_annotations[image_index]
        image = cv2.imread(jpeg_file)
        scale = self.image_size / max(image.shape[:2])
        if scale < 1 and self.augment:
            interp = cv2.INTER_AREA
        else:
            interp = cv2.INTER_LINEAR
        image = cv2.resize(image, (0, 0), fx=scale, fy=scale, interpolation=interp)
        image_resize_height, image_resize_width = image.shape[:2]
        return image, normalize_annotations, (image_resize_width, image_resize_height)

    def load_voc_annotation(self, annotation_file, label_map):
        '''
        加载标注文件xml，读取其中的bboxes
        参数：
            annotation_file[str]：  指定为xml文件路径
            label_map[list]：       指定为标签数组
        返回值：
            np.array([(xmin, ymin, xmax, ymax, class_index), (xmin, ymin, xmax, ymax, class_index)])
        '''
        with open(annotation_file, "r") as f:
            annotation_data = f.read()

        def middle(s, begin, end, pos_begin=0):
            p = s.find(begin, pos_begin)
            if p == -1:
                return None, None

            p += len(begin)
            e = s.find(end, p)
            if e == -1:
                return None, None

            return s[p:e], e + len(end)

        obj_bboxes = []
        object_, pos_ = middle(annotation_data, "<object>", "</object>")
        while object_ is not None:
            xmin = int(middle(object_, "<xmin>", "</xmin>")[0])
            ymin = int(middle(object_, "<ymin>", "</ymin>")[0])
            xmax = int(middle(object_, "<xmax>", "</xmax>")[0])
            ymax = int(middle(object_, "<ymax>", "</ymax>")[0])
            name = middle(object_, "<name>", "</name>")[0]
            object_, pos_ = middle(annotation_data, "<object>", "</object>", pos_)
            obj_bboxes.append((xmin, ymin, xmax, ymax, label_map.index(name)))

        # 创建一个0 x 5的ndarray，可以使得后面的计算顺利进行，不必处理box为0时候的问题，也不会造成shape不匹配的错误
        return_ndarray_bboxes = np.zeros((0, 5), dtype=np.float32)
        if len(obj_bboxes) > 0:
            return_ndarray_bboxes = np.array(obj_bboxes, dtype=np.float32)
        return return_ndarray_bboxes


if __name__ == '__main__':
    # 设置随机数种子
    nn_utils.setup_seed(13)
    dataset = VOCDataSet(True, 640, "E:\VOC2007\VOCdevkit\VOC2007")
    # print(len(dataset.all_labled_annotations))
    # image, normalize_annotations, (w, h) = dataset.load_image_with_uniform_scale(0)
    # print(image.shape, w, h)
    # print(normalize_annotations)
    # pixel_annotations = np.array([
    #     [101.55, 50, 200, 150, 1]
    # ])
    #
    # normalize_annotations = dataset.convert_to_normalize_annotation(pixel_annotations, 640, 640)
    # print(normalize_annotations)
    # result_pixel_annotations = dataset.convert_to_pixel_annotation(normalize_annotations, 640, 640)
    # print(result_pixel_annotations)
    image, normalize_annotations = dataset.load_mosaic(3)
    nn_utils.draw_norm_bboxes(image, normalize_annotations, thickness=3)
    cv2.imwrite("image.jpg", image)

View Code

nn.utls

import cv2
import torch
import torch.nn as nn
import math
import random
import numpy as np

from copy import deepcopy

class BBox:
    def __init__(self, x, y, r, b, landmark):
        
        self.x = x
        self.y = y
        self.r = r
        self.b = b
        self.landmark = landmark

    def __repr__(self):
        landmark_info = "HasLandmark" if self.landmark else "NoLandmark"
        return f"{{Face {self.x}, {self.y}, {self.r}, {self.b}, {landmark_info} }}"
    
    @property
    def left_top_i(self):
        return int(self.x), int(self.y)
    
    @property
    def right_bottom_i(self):
        return int(self.r), int(self.b)
    
    @property
    def center_i(self):
        return int((self.x + self.r) * 0.5), int((self.y + self.b) * 0.5)
    
    @property
    def center(self):
        return (self.x + self.r) * 0.5, (self.y + self.b) * 0.5
    
    @property
    def width(self):
        return self.r - self.x + 1
    
    @property
    def height(self):
        return self.b - self.y + 1

    @property
    def location(self):
        return self.x, self.y, self.r, self.b

    @property
    def landmark_union(self):
        union = ()
        for point in self.landmark:
            union = union + tuple(point)
        return union
        
class ImageObject:
    def __init__(self, file):
        self.file = file
        self.bboxes = []

    def add(self, annotation):
        x, y, w, h = annotation[:4]
        r = x + w - 1
        b = y + h - 1
        landmark = None
        
        if len(annotation) == 20:
            # x, y, w, h, xyz, xyz, xyz, xyz, xyz, unknow
            landmark = []
            for i in range(5):
                px = annotation[i * 3 + 0 + 4]
                py = annotation[i * 3 + 1 + 4]
                pz = annotation[i * 3 + 2 + 4]
                
                if pz == -1:
                    landmark = None
                    break
                    
                landmark.append([px, py])
        self.bboxes.append(BBox(x, y, r, b, landmark))
        

def load_widerface_annotation(ann_file):
    with open(ann_file, "r") as f:
        lines = f.readlines()

    imageObject = None
    file = None
    images = []
    for line in lines:
        line = line.replace("\n", "")

        if line[0] == "#":
            file = line[2:]
            imageObject = ImageObject(file)
            images.append(imageObject)
        else:
            imageObject.add([float(item) for item in line.split(" ")])
    return images


def draw_gauss_np(heatmap, x, y, box_size):

    if not isinstance(box_size, tuple):
        box_size = (box_size, box_size)

    box_width, box_height = box_size
    diameter = min(box_width, box_height)

    height, width = heatmap.shape[:2]
    sigma = diameter / 6
    radius = max(1, int(diameter * 0.5))
    s = 2 * sigma * sigma
    ky, kx = np.ogrid[-radius:+radius+1, -radius:+radius+1]
    kernel = np.exp(-(kx * kx + ky * ky) / s)
        
    dleft, dtop = -min(x, radius), -min(y, radius)
    dright, dbottom = +min(width - x, radius+1), +min(height - y, radius+1)
    select_heatmap = heatmap[y+dtop:y+dbottom, x+dleft:x+dright]
    select_kernel = kernel[radius+dtop:radius+dbottom, radius+dleft:radius+dright]
    if min(select_heatmap.shape) > 0:
        np.maximum(select_heatmap, select_kernel, out=select_heatmap)
    return heatmap

def draw_gauss_torch(heatmap, x, y, box_size):
    if not isinstance(box_size, tuple):
        box_size = (box_size, box_size)

    box_width, box_height = box_size
    diameter = min(box_width, box_height)
    device = heatmap.device
    dtype = heatmap.dtype

    x = int(x)
    y = int(y)
    height, width = heatmap.shape[:2]
    sigma = diameter / 6
    radius = max(1, int(diameter * 0.5))
    s = 2 * sigma * sigma
    ky = torch.arange(-radius, +radius+1, device=device, dtype=dtype).view(-1, 1)
    kx = torch.arange(-radius, +radius+1, device=device, dtype=dtype).view(1, -1)
    kernel = torch.exp(-(kx * kx + ky * ky) / s)
    
    dleft, dtop = -min(x, radius), -min(y, radius)
    dright, dbottom = +min(width - x, radius+1), +min(height - y, radius+1)
    select_heatmap = heatmap[y+dtop:y+dbottom, x+dleft:x+dright]
    select_kernel = kernel[radius+dtop:radius+dbottom, radius+dleft:radius+dright]
    if min(select_heatmap.shape) > 0:
        torch.max(select_heatmap, select_kernel, out=select_heatmap)
    return heatmap

def pad_image(image, stride):
    height, width = image.shape[:2]
    pad_x = stride - (width % stride) if width % stride != 0 else 0
    pad_y = stride - (height % stride) if height % stride != 0 else 0
    image = cv2.copyMakeBorder(image, 0, pad_y, 0, pad_x, cv2.BORDER_CONSTANT, (0, 0, 0))
    return image

def iou(a, b):
    ax, ay, ar, ab = a
    bx, by, br, bb = b

    cross_x = max(ax, bx)
    cross_y = max(ay, by)
    cross_r = min(ar, br)
    cross_b = min(ab, bb)
    cross_w = max(0, (cross_r - cross_x) + 1)
    cross_h = max(0, (cross_b - cross_y) + 1)
    cross_area = cross_w * cross_h
    union = (ar - ax + 1) * (ab - ay + 1) + (br - bx + 1) * (bb - by + 1) - cross_area
    return cross_area / union

def nms(bboxes, threshold, confidence_index=-1):
    bboxes.sort(key=lambda x: x[confidence_index], reverse=True)
    flags = [True] * len(bboxes)
    keep = []
    for i in range(len(bboxes)):
        if not flags[i]: continue
        keep.append(bboxes[i])

        for j in range(i+1, len(bboxes)):
            if iou(bboxes[i][:4], bboxes[j][:4]) > threshold:
                flags[j] = False
    return keep

def nmsAsClass(bboxes, threshold, class_index=-1, confidence_index=-2):

    boxasclass = {}
    for box in bboxes:
        classes = box[class_index]
        if classes not in boxasclass:
            boxasclass[classes] = []
        boxasclass[classes].append(box)

    output = []
    for key in boxasclass:
        result = nms(boxasclass[key], threshold, confidence_index)
        output.extend(result)
    return output


def iou_batch(a, b):
    # left, top, right, bottom
    a_xmin, a_xmax = a[..., 0], a[..., 2]
    a_ymin, a_ymax = a[..., 1], a[..., 3]
    b_xmin, b_xmax = b[..., 0], b[..., 2]
    b_ymin, b_ymax = b[..., 1], b[..., 3]
    inter_xmin = torch.max(a_xmin, b_xmin)
    inter_xmax = torch.min(a_xmax, b_xmax)
    inter_ymin = torch.max(a_ymin, b_ymin)
    inter_ymax = torch.min(a_ymax, b_ymax)
    inter_width = (inter_xmax - inter_xmin + 1).clamp(0)
    inter_height = (inter_ymax - inter_ymin + 1).clamp(0)
    inter_area = inter_width * inter_height

    a_width, a_height = (a_xmax - a_xmin + 1), (a_ymax - a_ymin + 1)
    b_width, b_height = (b_xmax - b_xmin + 1), (b_ymax - b_ymin + 1)
    union = (a_width * a_height) + (b_width * b_height) - inter_area
    return inter_area / union


def draw_bbox(image, left, top, right, bottom, confidence, classes, color=(0, 255, 0), thickness=1):

    left = int(left + 0.5)
    top = int(top + 0.5)
    right = int(right + 0.5)
    bottom = int(bottom + 0.5)
    cv2.rectangle(image, (left, top), (right, bottom), color, thickness)
    
    if classes == -1:
        text = f"{confidence:.2f}"
    else:
        text = f"[{classes}]{confidence:.2f}"
    cv2.putText(image, text, (left + 3, top - 5), 0, 0.5, (0, 0, 255), 1, 16)


def draw_norm_bboxes(image, bboxes, color=(0, 255, 0), thickness=1):
    '''
    绘制归一化的边框
    参数：
        image[ndarray]:         图像
        bboxes[Nx4/Nx5/Nx6]:    框信息，列数可以是4、5、6，顺序是[cx, cy, width, height, confidence, classes]，基于图像大小进行归一化的框
    '''

    image_height, image_width = image.shape[:2]
    for obj in bboxes:
        cx, cy, width, height = obj[:4] * [image_width, image_height, image_width, image_height]
        left = cx - (width - 1) * 0.5
        top = cy - (height - 1) * 0.5
        right = cx + (width - 1) * 0.5
        bottom = cy + (height - 1) * 0.5

        confidence = 0
        if len(obj) > 4:
            confidence = obj[4]

        classes = -1
        if len(obj) > 5:
            classes = obj[5]

        draw_bbox(image, left, top, right, bottom, confidence, classes, color, thickness)


def draw_pixel_bboxes(image, bboxes, color=(0, 255, 0), thickness=1):
    '''
    绘制边框，基于left, top, right, bottom标注
    '''
    for obj in bboxes:
        left, top, right, bottom = [int(item) for item in obj[:4]]

        confidence = 0
        if len(obj) > 4:
            confidence = obj[4]

        classes = -1
        if len(obj) > 5:
            classes = obj[5]

        draw_bbox(left, top, right, bottom, confidence, classes, color, thickness)


def get_center_affine_transform(src_width, src_height, dst_width, dst_height):
        s = min(dst_width / src_width, dst_height / src_height)
        new_width = s * src_width
        new_height = s * src_height
        dcx = dst_width * 0.5
        dcy = dst_height * 0.5
        
        dst_points = np.array([
            [dcx - new_width * 0.5, dcy - new_height * 0.5],
            [dcx + new_width * 0.5, dcy - new_height * 0.5],
            [dcx + new_width * 0.5, dcy + new_height * 0.5],
        ], dtype=np.float32)
        
        src_points = np.array([
            [0, 0],
            [src_width, 0],
            [src_width, src_height]
        ], dtype=np.float32)
        return cv2.getAffineTransform(src_points, dst_points)

def center_affine(image, width, height):
    src_height, src_width = image.shape[:2]
    M = get_center_affine_transform(src_width, src_height, width, height)
    return cv2.warpAffine(image, M, (width, height))

def inverse_center_affine_bboxes(image_width, image_height, net_width, net_height, bboxes):
    num_bboxes = len(bboxes)
    if num_bboxes == 0:
        return bboxes
    
    M = get_center_affine_transform(image_width, image_height, net_width, net_height)
    M = np.matrix(np.vstack([M, np.array([0, 0, 1])])).I
    M = M[:2]
    
    bboxes = np.array(bboxes) # 4 x 6,   left, top, right, bottom, confidence.item(), classes.item()
    left_top = bboxes[:, :2]
    right_bottom = bboxes[:, 2:4]

    left_top_project = (M @ np.hstack([left_top, np.ones([num_bboxes, 1])]).T).T
    right_bottom_project = (M @ np.hstack([right_bottom, np.ones([num_bboxes, 1])]).T).T
    new_box = np.hstack([left_top_project, right_bottom_project, bboxes[:, 4:]])
    return new_box.tolist()

def is_parallel(model):
    # is model is parallel with DP or DDP
    return type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel)

def setup_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# return GFlops, MParams
def compute_flops(model, input):
    try:
        from thop import profile
        from copy import deepcopy

        #profile是侵入式的，会污染model，导致名称修改，因此需要深度复制
        flops, params = profile(deepcopy(model).eval(), inputs=input, verbose=False)
        # 单位分别是GFlops和MParams
        return flops / 1E9, params / 1E6
    except Exception as e:
        pass

    return -1, -1


class ModelEMA:
    """ Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models
    Keep a moving average of everything in the model state_dict (parameters and buffers).
    This is intended to allow functionality like
    https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage
    A smoothed version of the weights is necessary for some training schemes to perform well.
    This class is sensitive where it is initialized in the sequence of model init,
    GPU assignment and distributed training wrappers.
    """

    def __init__(self, model, decay=0.9999, updates=0):
        # Create EMA
        self.ema = deepcopy(model.module if is_parallel(model) else model).eval()  # FP32 EMA
        # if next(model.parameters()).device.type != 'cpu':
        #     self.ema.half()  # FP16 EMA
        self.updates = updates  # number of EMA updates
        self.decay = lambda x: decay * (1 - math.exp(-x / 2000))  # decay exponential ramp (to help early epochs)
        for p in self.ema.parameters():
            p.requires_grad_(False)

    def update(self, model):
        # Update EMA parameters
        with torch.no_grad():
            self.updates += 1
            d = self.decay(self.updates)

            msd = model.module.state_dict() if is_parallel(model) else model.state_dict()  # model state_dict
            for k, v in self.ema.state_dict().items():
                if v.dtype.is_floating_point:
                    v *= d
                    v += (1. - d) * msd[k].detach()



def convert_to_pixel_annotation(normalize_annotations, image_width, image_height):
    '''
    转换标注信息从normalize到pixel
    参数：
        normalize_annotations[Nx5]:   指定为标注信息，格式是[cx, cy, width, height, class_index]
        image_width[int]:             指定为标注信息的图像宽度
        image_height[int]:            指定为标注信息的图像高度
    返回值：
        pixel_annotations[Nx5]:   返回格式是[left, top, right, bottom, class_index]
    '''
    
    # pixel_annotations = normalize_annotations.copy()
    # cx, cy, width, height, class_index = [normalize_annotations[:, i] for i in range(5)]
    # pixel_annotations[:, 0] = cx * image_width - (width * image_width - 1) * 0.5       # left
    # pixel_annotations[:, 1] = cy * image_height - (height * image_height - 1) * 0.5    # top
    # pixel_annotations[:, 2] = cx * image_width + (width * image_width - 1) * 0.5       # right
    # pixel_annotations[:, 3] = cy * image_height + (height * image_height - 1) * 0.5    # bottom
    # return pixel_annotations
    pixel_annotations = normalize_annotations.copy()
    cx, cy, width, height, class_index = [normalize_annotations[:, i] for i in range(5)]
    pixel_annotations[:, 0] = cx * image_width - (width * image_width - 1) * 0.5         # left
    pixel_annotations[:, 1] = cy * image_height - (height * image_height - 1) * 0.5      # top
    pixel_annotations[:, 2] = cx * image_width + (width * image_width - 1) * 0.5         # right
    pixel_annotations[:, 3] = cy * image_height + (height * image_height - 1) * 0.5      # bottom
    return pixel_annotations


def convert_to_normalize_annotation(pixel_annotations, image_width, image_height):
    '''
    转换标注信息到normalize格式，除以图像宽高进行归一化
    参数：
        pixel_annotations[Nx5]:       指定为标注信息，格式是[left, top, right, bottom, class_index]
        image_width[int]:             指定为标注信息的图像宽度
        image_height[int]:            指定为标注信息的图像高度
    返回值：
        normalize_annotations[Nx5]:   返回格式是[cx, cy, width, height, class_index]
    '''
    
    normalize_annotations = pixel_annotations.copy()
    left, top, right, bottom, class_index = [pixel_annotations[:, i] for i in range(5)]
    normalize_annotations[:, 0] = (left + right) * 0.5 / image_width  # cx
    normalize_annotations[:, 1] = (top + bottom) * 0.5 / image_height  # cy
    normalize_annotations[:, 2] = (right - left + 1) / image_width      # width
    normalize_annotations[:, 3] = (bottom - top + 1) / image_height      # height
    return normalize_annotations


def load_voc_annotation(annotation_file, label_map):
    '''
    加载标注文件xml，读取其中的bboxes
    参数：
        annotation_file[str]：  指定为xml文件路径
        label_map[list]：       指定为标签数组
    返回值：
        np.array([(xmin, ymin, xmax, ymax, class_index), (xmin, ymin, xmax, ymax, class_index)])
    '''
    with open(annotation_file, "r") as f:
        annotation_data = f.read()

    def middle(s, begin, end, pos_begin = 0):
        p = s.find(begin, pos_begin)
        if p == -1:
            return None, None

        p += len(begin)
        e = s.find(end, p)
        if e == -1:
            return None, None

        return s[p:e], e + len(end)

    obj_bboxes = []
    object_, pos_ = middle(annotation_data, "<object>", "</object>")
    while object_ is not None:
        xmin = int(middle(object_, "<xmin>", "</xmin>")[0])
        ymin = int(middle(object_, "<ymin>", "</ymin>")[0])
        xmax = int(middle(object_, "<xmax>", "</xmax>")[0])
        ymax = int(middle(object_, "<ymax>", "</ymax>")[0])
        name = middle(object_, "<name>", "</name>")[0]
        object_, pos_ = middle(annotation_data, "<object>", "</object>", pos_)
        obj_bboxes.append((xmin, ymin, xmax, ymax, label_map.index(name)))
    
    # 创建一个0 x 5的ndarray，可以使得后面的计算顺利进行，不必处理box为0时候的问题，也不会造成shape不匹配的错误
    return_ndarray_bboxes = np.zeros((0, 5), dtype=np.float32)
    if len(obj_bboxes) > 0:
        return_ndarray_bboxes = np.array(obj_bboxes, dtype=np.float32)
    return return_ndarray_bboxes

View Code

sys_utls

import logging
import datetime
import os
import shutil
import hashlib

from pathlib import Path
from PIL import Image, ExifTags
from logging.handlers import TimedRotatingFileHandler


for orientation in ExifTags.TAGS.keys():
    if ExifTags.TAGS[orientation] == 'Orientation':
        break

def get_md5(data):
    return hashlib.md5(data.encode(encoding='UTF-8')).hexdigest()

def exif_size(img):
    '''
    Returns exif-corrected PIL size
    '''
    width, height = img.size  # (width, height)
    try:
        rotation = dict(img._getexif().items())[orientation]
        if rotation == 6 or rotation == 8:  # rotation 270  or  rotation 90
            # exchange
            width, height = height, width

    except Exception as e:
        pass
    return width, height

def mkdirs(directory):
    try:
        os.makedirs(directory)
    except Exception as e:
        ...

def mkparents(path):
    parent = Path(path).parent
    if not os.path.exists(parent):
        mkdirs(parent)


def build_logger(path):
    logger = logging.getLogger("NewLogger")
    logger.setLevel(logging.INFO)
    mkparents(path)

    rf_handler = logging.handlers.TimedRotatingFileHandler(path, when='midnight', interval=1, backupCount=7, atTime=datetime.time(0, 0, 0, 0))
    formatter = logging.Formatter('[%(levelname)s][%(filename)s:%(lineno)d][%(asctime)s]: %(message)s')
    rf_handler.setFormatter(formatter)
    logger.addHandler(rf_handler)

    sh_handler = logging.StreamHandler()
    sh_handler.setFormatter(formatter)
    logger.addHandler(sh_handler)
    return logger


def build_default_logger():
    logger = logging.getLogger("DefaultLogger")
    logger.setLevel(logging.INFO)

    formatter = logging.Formatter('[%(levelname)s][%(filename)s:%(lineno)d][%(asctime)s]: %(message)s')
    sh_handler = logging.StreamHandler()
    sh_handler.setFormatter(formatter)
    logger.addHandler(sh_handler)
    return logger


def copy_code_to(src, dst):
    if len(dst) == 0 or dst == ".":
        print("invalid operate, copy to current directory")
        return

    for file in os.listdir(src):
        if file.endswith(".py"):
            source = f"{src}/{file}"
            dest = f"{dst}/{file}"
            mkparents(dest)
            shutil.copy(source, dest)


# 单例模式
class SingleInstanceLogger:
    def __init__(self):
        self.logger = build_default_logger()

    def __getattr__(self, name):
        return getattr(self.logger, name)


def setup_single_instance_logger(path):
    global _single_instance_logger
    _single_instance_logger.logger = build_logger(path)

_single_instance_logger = SingleInstanceLogger()


# class B:
#     def __init__(self):
#         self.name = "xiaozhang"

#     def echo(self, message):
#         print(f"Echo message: {message}")


# class A:
#     def __init__(self):
#         self.b = B()

#     def __getattr__(self, name):
#         print("get attr", name)
#         return getattr(self.b, name)

# a = A()
# a.echo("hello")
# print(a.name)

View Code

数据集都是voc2007

运行结果如下

标签：pixel,yolov5dataset,image,self,width,height,复写,annotations
From： https://www.cnblogs.com/xiaoruirui/p/16893345.html

相关文章

赞助商

阅读排行